1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <unistd.h> 27 #include <sys/types.h> 28 #ifdef MAJOR_IN_SYSMACROS 29 #include <sys/sysmacros.h> 30 #endif 31 #include <sys/stat.h> 32 #include <fcntl.h> 33 #if HAVE_ALLOCA_H 34 # include <alloca.h> 35 #endif 36 #include <sys/wait.h> 37 38 #include "CUnit/Basic.h" 39 40 #include "amdgpu_test.h" 41 #include "amdgpu_drm.h" 42 #include "amdgpu_internal.h" 43 #include "util_math.h" 44 45 static amdgpu_device_handle device_handle; 46 static uint32_t major_version; 47 static uint32_t minor_version; 48 static uint32_t family_id; 49 static uint32_t chip_id; 50 static uint32_t chip_rev; 51 52 static void amdgpu_query_info_test(void); 53 static void amdgpu_command_submission_gfx(void); 54 static void amdgpu_command_submission_compute(void); 55 static void amdgpu_command_submission_multi_fence(void); 56 static void amdgpu_command_submission_sdma(void); 57 static void amdgpu_userptr_test(void); 58 static void amdgpu_semaphore_test(void); 59 static void amdgpu_sync_dependency_test(void); 60 static void amdgpu_bo_eviction_test(void); 61 static void amdgpu_compute_dispatch_test(void); 62 static void amdgpu_gfx_dispatch_test(void); 63 static void amdgpu_draw_test(void); 64 static void amdgpu_gpu_reset_test(void); 65 static void amdgpu_stable_pstate_test(void); 66 67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 71 unsigned ip_type, 72 int instance, int pm4_dw, uint32_t *pm4_src, 73 int res_cnt, amdgpu_bo_handle *resources, 74 struct amdgpu_cs_ib_info *ib_info, 75 struct amdgpu_cs_request *ibs_request); 76 77 CU_TestInfo basic_tests[] = { 78 { "Query Info Test", amdgpu_query_info_test }, 79 { "Userptr Test", amdgpu_userptr_test }, 80 { "bo eviction Test", amdgpu_bo_eviction_test }, 81 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 82 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 83 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 84 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 85 { "SW semaphore Test", amdgpu_semaphore_test }, 86 { "Sync dependency Test", amdgpu_sync_dependency_test }, 87 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 88 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 89 { "Draw Test", amdgpu_draw_test }, 90 { "GPU reset Test", amdgpu_gpu_reset_test }, 91 { "Stable pstate Test", amdgpu_stable_pstate_test }, 92 CU_TEST_INFO_NULL, 93 }; 94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 95 #define SDMA_PKT_HEADER_op_offset 0 96 #define SDMA_PKT_HEADER_op_mask 0x000000FF 97 #define SDMA_PKT_HEADER_op_shift 0 98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 99 #define SDMA_OPCODE_CONSTANT_FILL 11 100 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 101 /* 0 = byte fill 102 * 2 = DW fill 103 */ 104 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 105 (((sub_op) & 0xFF) << 8) | \ 106 (((op) & 0xFF) << 0)) 107 #define SDMA_OPCODE_WRITE 2 108 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0 109 # define SDMA_WRTIE_SUB_OPCODE_TILED 1 110 111 #define SDMA_OPCODE_COPY 1 112 # define SDMA_COPY_SUB_OPCODE_LINEAR 0 113 114 #define SDMA_OPCODE_ATOMIC 10 115 # define SDMA_ATOMIC_LOOP(x) ((x) << 0) 116 /* 0 - single_pass_atomic. 117 * 1 - loop_until_compare_satisfied. 118 */ 119 # define SDMA_ATOMIC_TMZ(x) ((x) << 2) 120 /* 0 - non-TMZ. 121 * 1 - TMZ. 122 */ 123 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 124 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 125 * same as Packet 3 126 */ 127 128 #define GFX_COMPUTE_NOP 0xffff1000 129 #define SDMA_NOP 0x0 130 131 /* PM4 */ 132 #define PACKET_TYPE0 0 133 #define PACKET_TYPE1 1 134 #define PACKET_TYPE2 2 135 #define PACKET_TYPE3 3 136 137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 141 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 142 ((reg) & 0xFFFF) | \ 143 ((n) & 0x3FFF) << 16) 144 #define CP_PACKET2 0x80000000 145 #define PACKET2_PAD_SHIFT 0 146 #define PACKET2_PAD_MASK (0x3fffffff << 0) 147 148 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 149 150 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 151 (((op) & 0xFF) << 8) | \ 152 ((n) & 0x3FFF) << 16) 153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 154 155 /* Packet 3 types */ 156 #define PACKET3_NOP 0x10 157 158 #define PACKET3_WRITE_DATA 0x37 159 #define WRITE_DATA_DST_SEL(x) ((x) << 8) 160 /* 0 - register 161 * 1 - memory (sync - via GRBM) 162 * 2 - gl2 163 * 3 - gds 164 * 4 - reserved 165 * 5 - memory (async - direct) 166 */ 167 #define WR_ONE_ADDR (1 << 16) 168 #define WR_CONFIRM (1 << 20) 169 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 170 /* 0 - LRU 171 * 1 - Stream 172 */ 173 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 174 /* 0 - me 175 * 1 - pfp 176 * 2 - ce 177 */ 178 179 #define PACKET3_ATOMIC_MEM 0x1E 180 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 181 #define ATOMIC_MEM_COMMAND(x) ((x) << 8) 182 /* 0 - single_pass_atomic. 183 * 1 - loop_until_compare_satisfied. 184 */ 185 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 186 /* 0 - lru. 187 * 1 - stream. 188 */ 189 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 190 /* 0 - micro_engine. 191 */ 192 193 #define PACKET3_DMA_DATA 0x50 194 /* 1. header 195 * 2. CONTROL 196 * 3. SRC_ADDR_LO or DATA [31:0] 197 * 4. SRC_ADDR_HI [31:0] 198 * 5. DST_ADDR_LO [31:0] 199 * 6. DST_ADDR_HI [7:0] 200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 201 */ 202 /* CONTROL */ 203 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 204 /* 0 - ME 205 * 1 - PFP 206 */ 207 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 208 /* 0 - LRU 209 * 1 - Stream 210 * 2 - Bypass 211 */ 212 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 213 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 214 /* 0 - DST_ADDR using DAS 215 * 1 - GDS 216 * 3 - DST_ADDR using L2 217 */ 218 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 219 /* 0 - LRU 220 * 1 - Stream 221 * 2 - Bypass 222 */ 223 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 224 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 225 /* 0 - SRC_ADDR using SAS 226 * 1 - GDS 227 * 2 - DATA 228 * 3 - SRC_ADDR using L2 229 */ 230 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 231 /* COMMAND */ 232 # define PACKET3_DMA_DATA_DIS_WC (1 << 21) 233 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 234 /* 0 - none 235 * 1 - 8 in 16 236 * 2 - 8 in 32 237 * 3 - 8 in 64 238 */ 239 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 240 /* 0 - none 241 * 1 - 8 in 16 242 * 2 - 8 in 32 243 * 3 - 8 in 64 244 */ 245 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 246 /* 0 - memory 247 * 1 - register 248 */ 249 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 250 /* 0 - memory 251 * 1 - register 252 */ 253 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 254 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 255 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 256 257 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 258 (((b) & 0x1) << 26) | \ 259 (((t) & 0x1) << 23) | \ 260 (((s) & 0x1) << 22) | \ 261 (((cnt) & 0xFFFFF) << 0)) 262 #define SDMA_OPCODE_COPY_SI 3 263 #define SDMA_OPCODE_CONSTANT_FILL_SI 13 264 #define SDMA_NOP_SI 0xf 265 #define GFX_COMPUTE_NOP_SI 0x80000000 266 #define PACKET3_DMA_DATA_SI 0x41 267 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 268 /* 0 - ME 269 * 1 - PFP 270 */ 271 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 272 /* 0 - DST_ADDR using DAS 273 * 1 - GDS 274 * 3 - DST_ADDR using L2 275 */ 276 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 277 /* 0 - SRC_ADDR using SAS 278 * 1 - GDS 279 * 2 - DATA 280 * 3 - SRC_ADDR using L2 281 */ 282 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 283 284 285 #define PKT3_CONTEXT_CONTROL 0x28 286 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 287 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 288 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 289 290 #define PKT3_CLEAR_STATE 0x12 291 292 #define PKT3_SET_SH_REG 0x76 293 #define PACKET3_SET_SH_REG_START 0x00002c00 294 295 #define PKT3_SET_SH_REG_INDEX 0x9B 296 297 #define PACKET3_DISPATCH_DIRECT 0x15 298 #define PACKET3_EVENT_WRITE 0x46 299 #define PACKET3_ACQUIRE_MEM 0x58 300 #define PACKET3_SET_CONTEXT_REG 0x69 301 #define PACKET3_SET_UCONFIG_REG 0x79 302 #define PACKET3_DRAW_INDEX_AUTO 0x2D 303 /* gfx 8 */ 304 #define mmCOMPUTE_PGM_LO 0x2e0c 305 #define mmCOMPUTE_PGM_RSRC1 0x2e12 306 #define mmCOMPUTE_TMPRING_SIZE 0x2e18 307 #define mmCOMPUTE_USER_DATA_0 0x2e40 308 #define mmCOMPUTE_USER_DATA_1 0x2e41 309 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 310 #define mmCOMPUTE_NUM_THREAD_X 0x2e07 311 312 313 314 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 315 ((num & 0x0000ff00) << 8) | \ 316 ((num & 0x00ff0000) >> 8) | \ 317 ((num & 0x000000ff) << 24)) 318 319 320 /* Shader code 321 * void main() 322 { 323 324 float x = some_input; 325 for (unsigned i = 0; i < 1000000; i++) 326 x = sin(x); 327 328 u[0] = 42u; 329 } 330 */ 331 332 static uint32_t shader_bin[] = { 333 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 334 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 335 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 336 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 337 }; 338 339 #define CODE_OFFSET 512 340 #define DATA_OFFSET 1024 341 342 enum cs_type { 343 CS_BUFFERCLEAR, 344 CS_BUFFERCOPY, 345 CS_HANG, 346 CS_HANG_SLOW 347 }; 348 349 static const uint32_t bufferclear_cs_shader_gfx9[] = { 350 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 351 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, 352 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, 353 0xbf810000 354 }; 355 356 static const uint32_t bufferclear_cs_shader_gfx10[] = { 357 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205, 358 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004, 359 0xBF810000 360 }; 361 362 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 363 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 364 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 365 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 366 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 367 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 368 }; 369 370 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 371 372 static const uint32_t buffercopy_cs_shader_gfx9[] = { 373 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 374 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, 375 0xe01c2000, 0x80010200, 0xbf810000 376 }; 377 378 static const uint32_t buffercopy_cs_shader_gfx10[] = { 379 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201, 380 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000 381 }; 382 383 static const uint32_t preamblecache_gfx9[] = { 384 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 385 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 386 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 387 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 388 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 389 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 390 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 391 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 392 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 393 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 394 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 395 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 396 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 397 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 398 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 399 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 400 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 401 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 402 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 403 0xc0017900, 0x24b, 0x0 404 }; 405 406 static const uint32_t preamblecache_gfx10[] = { 407 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 408 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 409 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 410 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 411 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 412 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 413 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 414 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 415 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20, 416 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 417 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0, 418 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 419 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 420 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 421 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 422 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 423 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2, 424 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0, 425 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff, 426 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0, 427 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0, 428 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 429 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 430 }; 431 432 enum ps_type { 433 PS_CONST, 434 PS_TEX, 435 PS_HANG, 436 PS_HANG_SLOW 437 }; 438 439 static const uint32_t ps_const_shader_gfx9[] = { 440 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 441 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 442 0xC4001C0F, 0x00000100, 0xBF810000 443 }; 444 445 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 446 447 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 448 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 449 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 450 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 451 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 452 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 453 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 454 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 455 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 456 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 458 } 459 }; 460 461 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 462 0x00000004 463 }; 464 465 static const uint32_t ps_num_sh_registers_gfx9 = 2; 466 467 static const uint32_t ps_const_sh_registers_gfx9[][2] = { 468 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 469 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 470 }; 471 472 static const uint32_t ps_num_context_registers_gfx9 = 7; 473 474 static const uint32_t ps_const_context_reg_gfx9[][2] = { 475 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 476 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 477 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 478 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 479 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 480 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 481 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 482 }; 483 484 static const uint32_t ps_const_shader_gfx10[] = { 485 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 486 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 487 0xF8001C0F, 0x00000100, 0xBF810000 488 }; 489 490 static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6; 491 492 static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = { 493 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 494 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 }, 495 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 }, 496 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 }, 497 { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 498 { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 }, 499 { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 }, 500 { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 501 { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 502 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 } 503 } 504 }; 505 506 static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = { 507 0x00000004 508 }; 509 510 static const uint32_t ps_num_sh_registers_gfx10 = 2; 511 512 static const uint32_t ps_const_sh_registers_gfx10[][2] = { 513 {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 }, 514 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 515 }; 516 517 static const uint32_t ps_tex_shader_gfx9[] = { 518 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 519 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 520 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 521 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 522 0x00000100, 0xBF810000 523 }; 524 525 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 526 0x0000000B 527 }; 528 529 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 530 531 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 532 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 533 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 534 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 535 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 536 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 537 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 538 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 539 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 540 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 541 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 542 } 543 }; 544 545 static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 546 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 547 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 548 }; 549 550 static const uint32_t ps_tex_context_reg_gfx9[][2] = { 551 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 552 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 553 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 554 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 555 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 556 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 557 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 558 }; 559 560 static const uint32_t ps_tex_shader_gfx10[] = { 561 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, 562 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A, 563 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70, 564 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 565 0xF8001C0F, 0x00000100, 0xBF810000 566 }; 567 568 static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = { 569 0x0000000C 570 }; 571 572 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6; 573 574 static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = { 575 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 576 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 }, 577 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 }, 578 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 }, 579 { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 580 { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 581 { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 582 { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 583 { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 584 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 } 585 } 586 }; 587 588 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 589 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 590 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 591 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 592 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 593 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 594 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 595 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 596 0xC400020F, 0x05060403, 0xBF810000 597 }; 598 599 static const uint32_t vs_RectPosTexFast_shader_gfx10[] = { 600 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206, 601 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200, 602 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207, 603 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001, 604 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002, 605 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209, 606 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402, 607 0xBF810000 608 }; 609 610 static const uint32_t cached_cmd_gfx9[] = { 611 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 612 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 613 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 614 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 615 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 616 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 617 0xc0026900, 0x292, 0x20, 0x60201b8, 618 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 619 }; 620 621 static const uint32_t cached_cmd_gfx10[] = { 622 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 623 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 624 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 625 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18, 626 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 627 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 628 0xc0026900, 0x292, 0x20, 0x6020000, 629 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 630 }; 631 632 unsigned int memcpy_ps_hang[] = { 633 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 634 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 635 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 636 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 637 0xF800180F, 0x03020100, 0xBF810000 638 }; 639 640 struct amdgpu_test_shader { 641 uint32_t *shader; 642 uint32_t header_length; 643 uint32_t body_length; 644 uint32_t foot_length; 645 }; 646 647 unsigned int memcpy_cs_hang_slow_ai_codes[] = { 648 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 649 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 650 }; 651 652 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 653 memcpy_cs_hang_slow_ai_codes, 654 4, 655 3, 656 1 657 }; 658 659 unsigned int memcpy_cs_hang_slow_rv_codes[] = { 660 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 661 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 662 }; 663 664 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 665 memcpy_cs_hang_slow_rv_codes, 666 4, 667 3, 668 1 669 }; 670 671 unsigned int memcpy_cs_hang_slow_nv_codes[] = { 672 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100, 673 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000 674 }; 675 676 struct amdgpu_test_shader memcpy_cs_hang_slow_nv = { 677 memcpy_cs_hang_slow_nv_codes, 678 4, 679 3, 680 1 681 }; 682 683 unsigned int memcpy_ps_hang_slow_ai_codes[] = { 684 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 685 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 686 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 687 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 688 0x03020100, 0xbf810000 689 }; 690 691 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 692 memcpy_ps_hang_slow_ai_codes, 693 7, 694 2, 695 9 696 }; 697 698 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 699 unsigned alignment, unsigned heap, uint64_t alloc_flags, 700 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 701 uint64_t *mc_address, 702 amdgpu_va_handle *va_handle) 703 { 704 struct amdgpu_bo_alloc_request request = {}; 705 amdgpu_bo_handle buf_handle; 706 amdgpu_va_handle handle; 707 uint64_t vmc_addr; 708 int r; 709 710 request.alloc_size = size; 711 request.phys_alignment = alignment; 712 request.preferred_heap = heap; 713 request.flags = alloc_flags; 714 715 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 716 if (r) 717 return r; 718 719 r = amdgpu_va_range_alloc(dev, 720 amdgpu_gpu_va_range_general, 721 size, alignment, 0, &vmc_addr, 722 &handle, 0); 723 if (r) 724 goto error_va_alloc; 725 726 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 727 AMDGPU_VM_PAGE_READABLE | 728 AMDGPU_VM_PAGE_WRITEABLE | 729 AMDGPU_VM_PAGE_EXECUTABLE | 730 mapping_flags, 731 AMDGPU_VA_OP_MAP); 732 if (r) 733 goto error_va_map; 734 735 r = amdgpu_bo_cpu_map(buf_handle, cpu); 736 if (r) 737 goto error_cpu_map; 738 739 *bo = buf_handle; 740 *mc_address = vmc_addr; 741 *va_handle = handle; 742 743 return 0; 744 745 error_cpu_map: 746 amdgpu_bo_cpu_unmap(buf_handle); 747 748 error_va_map: 749 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 750 751 error_va_alloc: 752 amdgpu_bo_free(buf_handle); 753 return r; 754 } 755 756 757 758 CU_BOOL suite_basic_tests_enable(void) 759 { 760 761 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 762 &minor_version, &device_handle)) 763 return CU_FALSE; 764 765 766 family_id = device_handle->info.family_id; 767 chip_id = device_handle->info.chip_external_rev; 768 chip_rev = device_handle->info.chip_rev; 769 770 if (amdgpu_device_deinitialize(device_handle)) 771 return CU_FALSE; 772 773 /* disable gfx engine basic test cases for some asics have no CPG */ 774 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 775 if (amdgpu_set_test_active("Basic Tests", 776 "Command submission Test (GFX)", 777 CU_FALSE)) 778 fprintf(stderr, "test deactivation failed - %s\n", 779 CU_get_error_msg()); 780 781 if (amdgpu_set_test_active("Basic Tests", 782 "Command submission Test (Multi-Fence)", 783 CU_FALSE)) 784 fprintf(stderr, "test deactivation failed - %s\n", 785 CU_get_error_msg()); 786 787 if (amdgpu_set_test_active("Basic Tests", 788 "Sync dependency Test", 789 CU_FALSE)) 790 fprintf(stderr, "test deactivation failed - %s\n", 791 CU_get_error_msg()); 792 } 793 794 return CU_TRUE; 795 } 796 797 int suite_basic_tests_init(void) 798 { 799 struct amdgpu_gpu_info gpu_info = {0}; 800 int r; 801 802 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 803 &minor_version, &device_handle); 804 805 if (r) { 806 if ((r == -EACCES) && (errno == EACCES)) 807 printf("\n\nError:%s. " 808 "Hint:Try to run this test program as root.", 809 strerror(errno)); 810 return CUE_SINIT_FAILED; 811 } 812 813 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 814 if (r) 815 return CUE_SINIT_FAILED; 816 817 family_id = gpu_info.family_id; 818 819 return CUE_SUCCESS; 820 } 821 822 int suite_basic_tests_clean(void) 823 { 824 int r = amdgpu_device_deinitialize(device_handle); 825 826 if (r == 0) 827 return CUE_SUCCESS; 828 else 829 return CUE_SCLEAN_FAILED; 830 } 831 832 static void amdgpu_query_info_test(void) 833 { 834 struct amdgpu_gpu_info gpu_info = {0}; 835 uint32_t version, feature; 836 int r; 837 838 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 839 CU_ASSERT_EQUAL(r, 0); 840 841 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 842 0, &version, &feature); 843 CU_ASSERT_EQUAL(r, 0); 844 } 845 846 static void amdgpu_command_submission_gfx_separate_ibs(void) 847 { 848 amdgpu_context_handle context_handle; 849 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 850 void *ib_result_cpu, *ib_result_ce_cpu; 851 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 852 struct amdgpu_cs_request ibs_request = {0}; 853 struct amdgpu_cs_ib_info ib_info[2]; 854 struct amdgpu_cs_fence fence_status = {0}; 855 uint32_t *ptr; 856 uint32_t expired; 857 amdgpu_bo_list_handle bo_list; 858 amdgpu_va_handle va_handle, va_handle_ce; 859 int r, i = 0; 860 struct drm_amdgpu_info_hw_ip info; 861 862 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 863 CU_ASSERT_EQUAL(r, 0); 864 865 if (info.hw_ip_version_major >= 11) 866 return; 867 868 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 869 CU_ASSERT_EQUAL(r, 0); 870 871 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 872 AMDGPU_GEM_DOMAIN_GTT, 0, 873 &ib_result_handle, &ib_result_cpu, 874 &ib_result_mc_address, &va_handle); 875 CU_ASSERT_EQUAL(r, 0); 876 877 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 878 AMDGPU_GEM_DOMAIN_GTT, 0, 879 &ib_result_ce_handle, &ib_result_ce_cpu, 880 &ib_result_ce_mc_address, &va_handle_ce); 881 CU_ASSERT_EQUAL(r, 0); 882 883 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 884 ib_result_ce_handle, &bo_list); 885 CU_ASSERT_EQUAL(r, 0); 886 887 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 888 889 /* IT_SET_CE_DE_COUNTERS */ 890 ptr = ib_result_ce_cpu; 891 if (family_id != AMDGPU_FAMILY_SI) { 892 ptr[i++] = 0xc0008900; 893 ptr[i++] = 0; 894 } 895 ptr[i++] = 0xc0008400; 896 ptr[i++] = 1; 897 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 898 ib_info[0].size = i; 899 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 900 901 /* IT_WAIT_ON_CE_COUNTER */ 902 ptr = ib_result_cpu; 903 ptr[0] = 0xc0008600; 904 ptr[1] = 0x00000001; 905 ib_info[1].ib_mc_address = ib_result_mc_address; 906 ib_info[1].size = 2; 907 908 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 909 ibs_request.number_of_ibs = 2; 910 ibs_request.ibs = ib_info; 911 ibs_request.resources = bo_list; 912 ibs_request.fence_info.handle = NULL; 913 914 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 915 916 CU_ASSERT_EQUAL(r, 0); 917 918 fence_status.context = context_handle; 919 fence_status.ip_type = AMDGPU_HW_IP_GFX; 920 fence_status.ip_instance = 0; 921 fence_status.fence = ibs_request.seq_no; 922 923 r = amdgpu_cs_query_fence_status(&fence_status, 924 AMDGPU_TIMEOUT_INFINITE, 925 0, &expired); 926 CU_ASSERT_EQUAL(r, 0); 927 928 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 929 ib_result_mc_address, 4096); 930 CU_ASSERT_EQUAL(r, 0); 931 932 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 933 ib_result_ce_mc_address, 4096); 934 CU_ASSERT_EQUAL(r, 0); 935 936 r = amdgpu_bo_list_destroy(bo_list); 937 CU_ASSERT_EQUAL(r, 0); 938 939 r = amdgpu_cs_ctx_free(context_handle); 940 CU_ASSERT_EQUAL(r, 0); 941 942 } 943 944 static void amdgpu_command_submission_gfx_shared_ib(void) 945 { 946 amdgpu_context_handle context_handle; 947 amdgpu_bo_handle ib_result_handle; 948 void *ib_result_cpu; 949 uint64_t ib_result_mc_address; 950 struct amdgpu_cs_request ibs_request = {0}; 951 struct amdgpu_cs_ib_info ib_info[2]; 952 struct amdgpu_cs_fence fence_status = {0}; 953 uint32_t *ptr; 954 uint32_t expired; 955 amdgpu_bo_list_handle bo_list; 956 amdgpu_va_handle va_handle; 957 int r, i = 0; 958 struct drm_amdgpu_info_hw_ip info; 959 960 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 961 CU_ASSERT_EQUAL(r, 0); 962 963 if (info.hw_ip_version_major >= 11) 964 return; 965 966 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 967 CU_ASSERT_EQUAL(r, 0); 968 969 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 970 AMDGPU_GEM_DOMAIN_GTT, 0, 971 &ib_result_handle, &ib_result_cpu, 972 &ib_result_mc_address, &va_handle); 973 CU_ASSERT_EQUAL(r, 0); 974 975 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 976 &bo_list); 977 CU_ASSERT_EQUAL(r, 0); 978 979 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 980 981 /* IT_SET_CE_DE_COUNTERS */ 982 ptr = ib_result_cpu; 983 if (family_id != AMDGPU_FAMILY_SI) { 984 ptr[i++] = 0xc0008900; 985 ptr[i++] = 0; 986 } 987 ptr[i++] = 0xc0008400; 988 ptr[i++] = 1; 989 ib_info[0].ib_mc_address = ib_result_mc_address; 990 ib_info[0].size = i; 991 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 992 993 ptr = (uint32_t *)ib_result_cpu + 4; 994 ptr[0] = 0xc0008600; 995 ptr[1] = 0x00000001; 996 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 997 ib_info[1].size = 2; 998 999 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 1000 ibs_request.number_of_ibs = 2; 1001 ibs_request.ibs = ib_info; 1002 ibs_request.resources = bo_list; 1003 ibs_request.fence_info.handle = NULL; 1004 1005 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 1006 1007 CU_ASSERT_EQUAL(r, 0); 1008 1009 fence_status.context = context_handle; 1010 fence_status.ip_type = AMDGPU_HW_IP_GFX; 1011 fence_status.ip_instance = 0; 1012 fence_status.fence = ibs_request.seq_no; 1013 1014 r = amdgpu_cs_query_fence_status(&fence_status, 1015 AMDGPU_TIMEOUT_INFINITE, 1016 0, &expired); 1017 CU_ASSERT_EQUAL(r, 0); 1018 1019 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1020 ib_result_mc_address, 4096); 1021 CU_ASSERT_EQUAL(r, 0); 1022 1023 r = amdgpu_bo_list_destroy(bo_list); 1024 CU_ASSERT_EQUAL(r, 0); 1025 1026 r = amdgpu_cs_ctx_free(context_handle); 1027 CU_ASSERT_EQUAL(r, 0); 1028 } 1029 1030 static void amdgpu_command_submission_gfx_cp_write_data(void) 1031 { 1032 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 1033 } 1034 1035 static void amdgpu_command_submission_gfx_cp_const_fill(void) 1036 { 1037 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 1038 } 1039 1040 static void amdgpu_command_submission_gfx_cp_copy_data(void) 1041 { 1042 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 1043 } 1044 1045 static void amdgpu_bo_eviction_test(void) 1046 { 1047 const int sdma_write_length = 1024; 1048 const int pm4_dw = 256; 1049 amdgpu_context_handle context_handle; 1050 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 1051 amdgpu_bo_handle *resources; 1052 uint32_t *pm4; 1053 struct amdgpu_cs_ib_info *ib_info; 1054 struct amdgpu_cs_request *ibs_request; 1055 uint64_t bo1_mc, bo2_mc; 1056 volatile unsigned char *bo1_cpu, *bo2_cpu; 1057 int i, j, r, loop1, loop2; 1058 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1059 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1060 struct amdgpu_heap_info vram_info, gtt_info; 1061 1062 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1063 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1064 1065 ib_info = calloc(1, sizeof(*ib_info)); 1066 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1067 1068 ibs_request = calloc(1, sizeof(*ibs_request)); 1069 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1070 1071 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1072 CU_ASSERT_EQUAL(r, 0); 1073 1074 /* prepare resource */ 1075 resources = calloc(4, sizeof(amdgpu_bo_handle)); 1076 CU_ASSERT_NOT_EQUAL(resources, NULL); 1077 1078 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 1079 0, &vram_info); 1080 CU_ASSERT_EQUAL(r, 0); 1081 1082 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 1083 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 1084 CU_ASSERT_EQUAL(r, 0); 1085 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 1086 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 1087 CU_ASSERT_EQUAL(r, 0); 1088 1089 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 1090 0, >t_info); 1091 CU_ASSERT_EQUAL(r, 0); 1092 1093 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 1094 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 1095 CU_ASSERT_EQUAL(r, 0); 1096 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 1097 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 1098 CU_ASSERT_EQUAL(r, 0); 1099 1100 1101 1102 loop1 = loop2 = 0; 1103 /* run 9 circle to test all mapping combination */ 1104 while(loop1 < 2) { 1105 while(loop2 < 2) { 1106 /* allocate UC bo1for sDMA use */ 1107 r = amdgpu_bo_alloc_and_map(device_handle, 1108 sdma_write_length, 4096, 1109 AMDGPU_GEM_DOMAIN_GTT, 1110 gtt_flags[loop1], &bo1, 1111 (void**)&bo1_cpu, &bo1_mc, 1112 &bo1_va_handle); 1113 CU_ASSERT_EQUAL(r, 0); 1114 1115 /* set bo1 */ 1116 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1117 1118 /* allocate UC bo2 for sDMA use */ 1119 r = amdgpu_bo_alloc_and_map(device_handle, 1120 sdma_write_length, 4096, 1121 AMDGPU_GEM_DOMAIN_GTT, 1122 gtt_flags[loop2], &bo2, 1123 (void**)&bo2_cpu, &bo2_mc, 1124 &bo2_va_handle); 1125 CU_ASSERT_EQUAL(r, 0); 1126 1127 /* clear bo2 */ 1128 memset((void*)bo2_cpu, 0, sdma_write_length); 1129 1130 resources[0] = bo1; 1131 resources[1] = bo2; 1132 resources[2] = vram_max[loop2]; 1133 resources[3] = gtt_max[loop2]; 1134 1135 /* fulfill PM4: test DMA copy linear */ 1136 i = j = 0; 1137 if (family_id == AMDGPU_FAMILY_SI) { 1138 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 1139 sdma_write_length); 1140 pm4[i++] = 0xffffffff & bo2_mc; 1141 pm4[i++] = 0xffffffff & bo1_mc; 1142 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1143 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1144 } else { 1145 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 1146 if (family_id >= AMDGPU_FAMILY_AI) 1147 pm4[i++] = sdma_write_length - 1; 1148 else 1149 pm4[i++] = sdma_write_length; 1150 pm4[i++] = 0; 1151 pm4[i++] = 0xffffffff & bo1_mc; 1152 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1153 pm4[i++] = 0xffffffff & bo2_mc; 1154 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1155 } 1156 1157 amdgpu_test_exec_cs_helper(context_handle, 1158 AMDGPU_HW_IP_DMA, 0, 1159 i, pm4, 1160 4, resources, 1161 ib_info, ibs_request); 1162 1163 /* verify if SDMA test result meets with expected */ 1164 i = 0; 1165 while(i < sdma_write_length) { 1166 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1167 } 1168 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1169 sdma_write_length); 1170 CU_ASSERT_EQUAL(r, 0); 1171 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1172 sdma_write_length); 1173 CU_ASSERT_EQUAL(r, 0); 1174 loop2++; 1175 } 1176 loop2 = 0; 1177 loop1++; 1178 } 1179 amdgpu_bo_free(vram_max[0]); 1180 amdgpu_bo_free(vram_max[1]); 1181 amdgpu_bo_free(gtt_max[0]); 1182 amdgpu_bo_free(gtt_max[1]); 1183 /* clean resources */ 1184 free(resources); 1185 free(ibs_request); 1186 free(ib_info); 1187 free(pm4); 1188 1189 /* end of test */ 1190 r = amdgpu_cs_ctx_free(context_handle); 1191 CU_ASSERT_EQUAL(r, 0); 1192 } 1193 1194 1195 static void amdgpu_command_submission_gfx(void) 1196 { 1197 /* write data using the CP */ 1198 amdgpu_command_submission_gfx_cp_write_data(); 1199 /* const fill using the CP */ 1200 amdgpu_command_submission_gfx_cp_const_fill(); 1201 /* copy data using the CP */ 1202 amdgpu_command_submission_gfx_cp_copy_data(); 1203 /* separate IB buffers for multi-IB submission */ 1204 amdgpu_command_submission_gfx_separate_ibs(); 1205 /* shared IB buffer for multi-IB submission */ 1206 amdgpu_command_submission_gfx_shared_ib(); 1207 } 1208 1209 static void amdgpu_semaphore_test(void) 1210 { 1211 amdgpu_context_handle context_handle[2]; 1212 amdgpu_semaphore_handle sem; 1213 amdgpu_bo_handle ib_result_handle[2]; 1214 void *ib_result_cpu[2]; 1215 uint64_t ib_result_mc_address[2]; 1216 struct amdgpu_cs_request ibs_request[2] = {0}; 1217 struct amdgpu_cs_ib_info ib_info[2] = {0}; 1218 struct amdgpu_cs_fence fence_status = {0}; 1219 uint32_t *ptr; 1220 uint32_t expired; 1221 uint32_t sdma_nop, gfx_nop; 1222 amdgpu_bo_list_handle bo_list[2]; 1223 amdgpu_va_handle va_handle[2]; 1224 int r, i; 1225 struct amdgpu_gpu_info gpu_info = {0}; 1226 unsigned gc_ip_type; 1227 1228 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 1229 CU_ASSERT_EQUAL(r, 0); 1230 1231 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 1232 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 1233 1234 if (family_id == AMDGPU_FAMILY_SI) { 1235 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1236 gfx_nop = GFX_COMPUTE_NOP_SI; 1237 } else { 1238 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1239 gfx_nop = GFX_COMPUTE_NOP; 1240 } 1241 1242 r = amdgpu_cs_create_semaphore(&sem); 1243 CU_ASSERT_EQUAL(r, 0); 1244 for (i = 0; i < 2; i++) { 1245 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 1246 CU_ASSERT_EQUAL(r, 0); 1247 1248 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1249 AMDGPU_GEM_DOMAIN_GTT, 0, 1250 &ib_result_handle[i], &ib_result_cpu[i], 1251 &ib_result_mc_address[i], &va_handle[i]); 1252 CU_ASSERT_EQUAL(r, 0); 1253 1254 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 1255 NULL, &bo_list[i]); 1256 CU_ASSERT_EQUAL(r, 0); 1257 } 1258 1259 /* 1. same context different engine */ 1260 ptr = ib_result_cpu[0]; 1261 ptr[0] = sdma_nop; 1262 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1263 ib_info[0].size = 1; 1264 1265 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 1266 ibs_request[0].number_of_ibs = 1; 1267 ibs_request[0].ibs = &ib_info[0]; 1268 ibs_request[0].resources = bo_list[0]; 1269 ibs_request[0].fence_info.handle = NULL; 1270 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1271 CU_ASSERT_EQUAL(r, 0); 1272 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 1273 CU_ASSERT_EQUAL(r, 0); 1274 1275 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1276 CU_ASSERT_EQUAL(r, 0); 1277 ptr = ib_result_cpu[1]; 1278 ptr[0] = gfx_nop; 1279 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1280 ib_info[1].size = 1; 1281 1282 ibs_request[1].ip_type = gc_ip_type; 1283 ibs_request[1].number_of_ibs = 1; 1284 ibs_request[1].ibs = &ib_info[1]; 1285 ibs_request[1].resources = bo_list[1]; 1286 ibs_request[1].fence_info.handle = NULL; 1287 1288 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 1289 CU_ASSERT_EQUAL(r, 0); 1290 1291 fence_status.context = context_handle[0]; 1292 fence_status.ip_type = gc_ip_type; 1293 fence_status.ip_instance = 0; 1294 fence_status.fence = ibs_request[1].seq_no; 1295 r = amdgpu_cs_query_fence_status(&fence_status, 1296 AMDGPU_TIMEOUT_INFINITE, 0, &expired); 1297 CU_ASSERT_EQUAL(r, 0); 1298 CU_ASSERT_EQUAL(expired, true); 1299 1300 /* 2. same engine different context */ 1301 ptr = ib_result_cpu[0]; 1302 ptr[0] = gfx_nop; 1303 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1304 ib_info[0].size = 1; 1305 1306 ibs_request[0].ip_type = gc_ip_type; 1307 ibs_request[0].number_of_ibs = 1; 1308 ibs_request[0].ibs = &ib_info[0]; 1309 ibs_request[0].resources = bo_list[0]; 1310 ibs_request[0].fence_info.handle = NULL; 1311 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1312 CU_ASSERT_EQUAL(r, 0); 1313 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1314 CU_ASSERT_EQUAL(r, 0); 1315 1316 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem); 1317 CU_ASSERT_EQUAL(r, 0); 1318 ptr = ib_result_cpu[1]; 1319 ptr[0] = gfx_nop; 1320 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1321 ib_info[1].size = 1; 1322 1323 ibs_request[1].ip_type = gc_ip_type; 1324 ibs_request[1].number_of_ibs = 1; 1325 ibs_request[1].ibs = &ib_info[1]; 1326 ibs_request[1].resources = bo_list[1]; 1327 ibs_request[1].fence_info.handle = NULL; 1328 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1329 1330 CU_ASSERT_EQUAL(r, 0); 1331 1332 fence_status.context = context_handle[1]; 1333 fence_status.ip_type = gc_ip_type; 1334 fence_status.ip_instance = 0; 1335 fence_status.fence = ibs_request[1].seq_no; 1336 r = amdgpu_cs_query_fence_status(&fence_status, 1337 AMDGPU_TIMEOUT_INFINITE, 0, &expired); 1338 CU_ASSERT_EQUAL(r, 0); 1339 CU_ASSERT_EQUAL(expired, true); 1340 1341 for (i = 0; i < 2; i++) { 1342 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1343 ib_result_mc_address[i], 4096); 1344 CU_ASSERT_EQUAL(r, 0); 1345 1346 r = amdgpu_bo_list_destroy(bo_list[i]); 1347 CU_ASSERT_EQUAL(r, 0); 1348 1349 r = amdgpu_cs_ctx_free(context_handle[i]); 1350 CU_ASSERT_EQUAL(r, 0); 1351 } 1352 1353 r = amdgpu_cs_destroy_semaphore(sem); 1354 CU_ASSERT_EQUAL(r, 0); 1355 } 1356 1357 static void amdgpu_command_submission_compute_nop(void) 1358 { 1359 amdgpu_context_handle context_handle; 1360 amdgpu_bo_handle ib_result_handle; 1361 void *ib_result_cpu; 1362 uint64_t ib_result_mc_address; 1363 struct amdgpu_cs_request ibs_request; 1364 struct amdgpu_cs_ib_info ib_info; 1365 struct amdgpu_cs_fence fence_status; 1366 uint32_t *ptr; 1367 uint32_t expired; 1368 int r, instance; 1369 amdgpu_bo_list_handle bo_list; 1370 amdgpu_va_handle va_handle; 1371 struct drm_amdgpu_info_hw_ip info; 1372 1373 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1374 CU_ASSERT_EQUAL(r, 0); 1375 1376 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1377 CU_ASSERT_EQUAL(r, 0); 1378 1379 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1380 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1381 AMDGPU_GEM_DOMAIN_GTT, 0, 1382 &ib_result_handle, &ib_result_cpu, 1383 &ib_result_mc_address, &va_handle); 1384 CU_ASSERT_EQUAL(r, 0); 1385 1386 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1387 &bo_list); 1388 CU_ASSERT_EQUAL(r, 0); 1389 1390 ptr = ib_result_cpu; 1391 memset(ptr, 0, 16); 1392 ptr[0]=PACKET3(PACKET3_NOP, 14); 1393 1394 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1395 ib_info.ib_mc_address = ib_result_mc_address; 1396 ib_info.size = 16; 1397 1398 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1399 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1400 ibs_request.ring = instance; 1401 ibs_request.number_of_ibs = 1; 1402 ibs_request.ibs = &ib_info; 1403 ibs_request.resources = bo_list; 1404 ibs_request.fence_info.handle = NULL; 1405 1406 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1407 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1408 CU_ASSERT_EQUAL(r, 0); 1409 1410 fence_status.context = context_handle; 1411 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1412 fence_status.ip_instance = 0; 1413 fence_status.ring = instance; 1414 fence_status.fence = ibs_request.seq_no; 1415 1416 r = amdgpu_cs_query_fence_status(&fence_status, 1417 AMDGPU_TIMEOUT_INFINITE, 1418 0, &expired); 1419 CU_ASSERT_EQUAL(r, 0); 1420 1421 r = amdgpu_bo_list_destroy(bo_list); 1422 CU_ASSERT_EQUAL(r, 0); 1423 1424 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1425 ib_result_mc_address, 4096); 1426 CU_ASSERT_EQUAL(r, 0); 1427 } 1428 1429 r = amdgpu_cs_ctx_free(context_handle); 1430 CU_ASSERT_EQUAL(r, 0); 1431 } 1432 1433 static void amdgpu_command_submission_compute_cp_write_data(void) 1434 { 1435 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1436 } 1437 1438 static void amdgpu_command_submission_compute_cp_const_fill(void) 1439 { 1440 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1441 } 1442 1443 static void amdgpu_command_submission_compute_cp_copy_data(void) 1444 { 1445 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1446 } 1447 1448 static void amdgpu_command_submission_compute(void) 1449 { 1450 /* write data using the CP */ 1451 amdgpu_command_submission_compute_cp_write_data(); 1452 /* const fill using the CP */ 1453 amdgpu_command_submission_compute_cp_const_fill(); 1454 /* copy data using the CP */ 1455 amdgpu_command_submission_compute_cp_copy_data(); 1456 /* nop test */ 1457 amdgpu_command_submission_compute_nop(); 1458 } 1459 1460 /* 1461 * caller need create/release: 1462 * pm4_src, resources, ib_info, and ibs_request 1463 * submit command stream described in ibs_request and wait for this IB accomplished 1464 */ 1465 void 1466 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 1467 amdgpu_context_handle context_handle, 1468 unsigned ip_type, int instance, int pm4_dw, 1469 uint32_t *pm4_src, int res_cnt, 1470 amdgpu_bo_handle *resources, 1471 struct amdgpu_cs_ib_info *ib_info, 1472 struct amdgpu_cs_request *ibs_request, 1473 bool secure) 1474 { 1475 int r; 1476 uint32_t expired; 1477 uint32_t *ring_ptr; 1478 amdgpu_bo_handle ib_result_handle; 1479 void *ib_result_cpu; 1480 uint64_t ib_result_mc_address; 1481 struct amdgpu_cs_fence fence_status = {0}; 1482 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1483 amdgpu_va_handle va_handle; 1484 1485 /* prepare CS */ 1486 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1487 CU_ASSERT_NOT_EQUAL(resources, NULL); 1488 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1489 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1490 CU_ASSERT_TRUE(pm4_dw <= 1024); 1491 1492 /* allocate IB */ 1493 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1494 AMDGPU_GEM_DOMAIN_GTT, 0, 1495 &ib_result_handle, &ib_result_cpu, 1496 &ib_result_mc_address, &va_handle); 1497 CU_ASSERT_EQUAL(r, 0); 1498 1499 /* copy PM4 packet to ring from caller */ 1500 ring_ptr = ib_result_cpu; 1501 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1502 1503 ib_info->ib_mc_address = ib_result_mc_address; 1504 ib_info->size = pm4_dw; 1505 if (secure) 1506 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 1507 1508 ibs_request->ip_type = ip_type; 1509 ibs_request->ring = instance; 1510 ibs_request->number_of_ibs = 1; 1511 ibs_request->ibs = ib_info; 1512 ibs_request->fence_info.handle = NULL; 1513 1514 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1515 all_res[res_cnt] = ib_result_handle; 1516 1517 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1518 NULL, &ibs_request->resources); 1519 CU_ASSERT_EQUAL(r, 0); 1520 1521 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1522 1523 /* submit CS */ 1524 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1525 CU_ASSERT_EQUAL(r, 0); 1526 1527 r = amdgpu_bo_list_destroy(ibs_request->resources); 1528 CU_ASSERT_EQUAL(r, 0); 1529 1530 fence_status.ip_type = ip_type; 1531 fence_status.ip_instance = 0; 1532 fence_status.ring = ibs_request->ring; 1533 fence_status.context = context_handle; 1534 fence_status.fence = ibs_request->seq_no; 1535 1536 /* wait for IB accomplished */ 1537 r = amdgpu_cs_query_fence_status(&fence_status, 1538 AMDGPU_TIMEOUT_INFINITE, 1539 0, &expired); 1540 CU_ASSERT_EQUAL(r, 0); 1541 CU_ASSERT_EQUAL(expired, true); 1542 1543 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1544 ib_result_mc_address, 4096); 1545 CU_ASSERT_EQUAL(r, 0); 1546 } 1547 1548 static void 1549 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1550 unsigned ip_type, int instance, int pm4_dw, 1551 uint32_t *pm4_src, int res_cnt, 1552 amdgpu_bo_handle *resources, 1553 struct amdgpu_cs_ib_info *ib_info, 1554 struct amdgpu_cs_request *ibs_request) 1555 { 1556 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 1557 ip_type, instance, pm4_dw, pm4_src, 1558 res_cnt, resources, ib_info, 1559 ibs_request, false); 1560 } 1561 1562 void 1563 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 1564 device, unsigned 1565 ip_type, bool secure) 1566 { 1567 const int sdma_write_length = 128; 1568 const int pm4_dw = 256; 1569 amdgpu_context_handle context_handle; 1570 amdgpu_bo_handle bo; 1571 amdgpu_bo_handle *resources; 1572 uint32_t *pm4; 1573 struct amdgpu_cs_ib_info *ib_info; 1574 struct amdgpu_cs_request *ibs_request; 1575 uint64_t bo_mc; 1576 volatile uint32_t *bo_cpu; 1577 uint32_t bo_cpu_origin; 1578 int i, j, r, loop, ring_id; 1579 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1580 amdgpu_va_handle va_handle; 1581 struct drm_amdgpu_info_hw_ip hw_ip_info; 1582 1583 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1584 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1585 1586 ib_info = calloc(1, sizeof(*ib_info)); 1587 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1588 1589 ibs_request = calloc(1, sizeof(*ibs_request)); 1590 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1591 1592 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 1593 CU_ASSERT_EQUAL(r, 0); 1594 1595 for (i = 0; secure && (i < 2); i++) 1596 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 1597 1598 r = amdgpu_cs_ctx_create(device, &context_handle); 1599 1600 CU_ASSERT_EQUAL(r, 0); 1601 1602 /* prepare resource */ 1603 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1604 CU_ASSERT_NOT_EQUAL(resources, NULL); 1605 1606 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1607 loop = 0; 1608 while(loop < 2) { 1609 /* allocate UC bo for sDMA use */ 1610 r = amdgpu_bo_alloc_and_map(device, 1611 sdma_write_length * sizeof(uint32_t), 1612 4096, AMDGPU_GEM_DOMAIN_GTT, 1613 gtt_flags[loop], &bo, (void**)&bo_cpu, 1614 &bo_mc, &va_handle); 1615 CU_ASSERT_EQUAL(r, 0); 1616 1617 /* clear bo */ 1618 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1619 1620 resources[0] = bo; 1621 1622 /* fulfill PM4: test DMA write-linear */ 1623 i = j = 0; 1624 if (ip_type == AMDGPU_HW_IP_DMA) { 1625 if (family_id == AMDGPU_FAMILY_SI) 1626 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1627 sdma_write_length); 1628 else 1629 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1630 SDMA_WRITE_SUB_OPCODE_LINEAR, 1631 secure ? SDMA_ATOMIC_TMZ(1) : 0); 1632 pm4[i++] = 0xfffffffc & bo_mc; 1633 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1634 if (family_id >= AMDGPU_FAMILY_AI) 1635 pm4[i++] = sdma_write_length - 1; 1636 else if (family_id != AMDGPU_FAMILY_SI) 1637 pm4[i++] = sdma_write_length; 1638 while(j++ < sdma_write_length) 1639 pm4[i++] = 0xdeadbeaf; 1640 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1641 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1642 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1643 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1644 pm4[i++] = 0xfffffffc & bo_mc; 1645 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1646 while(j++ < sdma_write_length) 1647 pm4[i++] = 0xdeadbeaf; 1648 } 1649 1650 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1651 ip_type, ring_id, i, pm4, 1652 1, resources, ib_info, 1653 ibs_request, secure); 1654 1655 /* verify if SDMA test result meets with expected */ 1656 i = 0; 1657 if (!secure) { 1658 while(i < sdma_write_length) { 1659 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1660 } 1661 } else if (ip_type == AMDGPU_HW_IP_GFX) { 1662 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1663 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 1664 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1665 * command, 1-loop_until_compare_satisfied. 1666 * single_pass_atomic, 0-lru 1667 * engine_sel, 0-micro_engine 1668 */ 1669 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 1670 ATOMIC_MEM_COMMAND(1) | 1671 ATOMIC_MEM_CACHEPOLICAY(0) | 1672 ATOMIC_MEM_ENGINESEL(0)); 1673 pm4[i++] = 0xfffffffc & bo_mc; 1674 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1675 pm4[i++] = 0x12345678; 1676 pm4[i++] = 0x0; 1677 pm4[i++] = 0xdeadbeaf; 1678 pm4[i++] = 0x0; 1679 pm4[i++] = 0x100; 1680 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1681 ip_type, ring_id, i, pm4, 1682 1, resources, ib_info, 1683 ibs_request, true); 1684 } else if (ip_type == AMDGPU_HW_IP_DMA) { 1685 /* restore the bo_cpu to compare */ 1686 bo_cpu_origin = bo_cpu[0]; 1687 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1688 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1689 * loop, 1-loop_until_compare_satisfied. 1690 * single_pass_atomic, 0-lru 1691 */ 1692 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1693 0, 1694 SDMA_ATOMIC_LOOP(1) | 1695 SDMA_ATOMIC_TMZ(1) | 1696 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1697 pm4[i++] = 0xfffffffc & bo_mc; 1698 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1699 pm4[i++] = 0x12345678; 1700 pm4[i++] = 0x0; 1701 pm4[i++] = 0xdeadbeaf; 1702 pm4[i++] = 0x0; 1703 pm4[i++] = 0x100; 1704 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1705 ip_type, ring_id, i, pm4, 1706 1, resources, ib_info, 1707 ibs_request, true); 1708 /* DMA's atomic behavir is unlike GFX 1709 * If the comparing data is not equal to destination data, 1710 * For GFX, loop again till gfx timeout(system hang). 1711 * For DMA, loop again till timer expired and then send interrupt. 1712 * So testcase can't use interrupt mechanism. 1713 * We take another way to verify. When the comparing data is not 1714 * equal to destination data, overwrite the source data to the destination 1715 * buffer. Otherwise, original destination data unchanged. 1716 * So if the bo_cpu data is overwritten, the result is passed. 1717 */ 1718 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 1719 1720 /* compare again for the case of dest_data != cmp_data */ 1721 i = 0; 1722 /* restore again, here dest_data should be */ 1723 bo_cpu_origin = bo_cpu[0]; 1724 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1725 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1726 0, 1727 SDMA_ATOMIC_LOOP(1) | 1728 SDMA_ATOMIC_TMZ(1) | 1729 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1730 pm4[i++] = 0xfffffffc & bo_mc; 1731 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1732 pm4[i++] = 0x87654321; 1733 pm4[i++] = 0x0; 1734 pm4[i++] = 0xdeadbeaf; 1735 pm4[i++] = 0x0; 1736 pm4[i++] = 0x100; 1737 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1738 ip_type, ring_id, i, pm4, 1739 1, resources, ib_info, 1740 ibs_request, true); 1741 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 1742 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 1743 } 1744 1745 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1746 sdma_write_length * sizeof(uint32_t)); 1747 CU_ASSERT_EQUAL(r, 0); 1748 loop++; 1749 } 1750 } 1751 /* clean resources */ 1752 free(resources); 1753 free(ibs_request); 1754 free(ib_info); 1755 free(pm4); 1756 1757 /* end of test */ 1758 r = amdgpu_cs_ctx_free(context_handle); 1759 CU_ASSERT_EQUAL(r, 0); 1760 } 1761 1762 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1763 { 1764 amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 1765 ip_type, 1766 false); 1767 } 1768 1769 static void amdgpu_command_submission_sdma_write_linear(void) 1770 { 1771 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1772 } 1773 1774 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1775 { 1776 const int sdma_write_length = 1024 * 1024; 1777 const int pm4_dw = 256; 1778 amdgpu_context_handle context_handle; 1779 amdgpu_bo_handle bo; 1780 amdgpu_bo_handle *resources; 1781 uint32_t *pm4; 1782 struct amdgpu_cs_ib_info *ib_info; 1783 struct amdgpu_cs_request *ibs_request; 1784 uint64_t bo_mc; 1785 volatile uint32_t *bo_cpu; 1786 int i, j, r, loop, ring_id; 1787 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1788 amdgpu_va_handle va_handle; 1789 struct drm_amdgpu_info_hw_ip hw_ip_info; 1790 1791 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1792 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1793 1794 ib_info = calloc(1, sizeof(*ib_info)); 1795 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1796 1797 ibs_request = calloc(1, sizeof(*ibs_request)); 1798 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1799 1800 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1801 CU_ASSERT_EQUAL(r, 0); 1802 1803 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1804 CU_ASSERT_EQUAL(r, 0); 1805 1806 /* prepare resource */ 1807 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1808 CU_ASSERT_NOT_EQUAL(resources, NULL); 1809 1810 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1811 loop = 0; 1812 while(loop < 2) { 1813 /* allocate UC bo for sDMA use */ 1814 r = amdgpu_bo_alloc_and_map(device_handle, 1815 sdma_write_length, 4096, 1816 AMDGPU_GEM_DOMAIN_GTT, 1817 gtt_flags[loop], &bo, (void**)&bo_cpu, 1818 &bo_mc, &va_handle); 1819 CU_ASSERT_EQUAL(r, 0); 1820 1821 /* clear bo */ 1822 memset((void*)bo_cpu, 0, sdma_write_length); 1823 1824 resources[0] = bo; 1825 1826 /* fulfill PM4: test DMA const fill */ 1827 i = j = 0; 1828 if (ip_type == AMDGPU_HW_IP_DMA) { 1829 if (family_id == AMDGPU_FAMILY_SI) { 1830 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1831 0, 0, 0, 1832 sdma_write_length / 4); 1833 pm4[i++] = 0xfffffffc & bo_mc; 1834 pm4[i++] = 0xdeadbeaf; 1835 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1836 } else { 1837 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1838 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1839 pm4[i++] = 0xffffffff & bo_mc; 1840 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1841 pm4[i++] = 0xdeadbeaf; 1842 if (family_id >= AMDGPU_FAMILY_AI) 1843 pm4[i++] = sdma_write_length - 1; 1844 else 1845 pm4[i++] = sdma_write_length; 1846 } 1847 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1848 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1849 if (family_id == AMDGPU_FAMILY_SI) { 1850 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1851 pm4[i++] = 0xdeadbeaf; 1852 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1853 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1854 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1855 PACKET3_DMA_DATA_SI_CP_SYNC; 1856 pm4[i++] = 0xffffffff & bo_mc; 1857 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1858 pm4[i++] = sdma_write_length; 1859 } else { 1860 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1861 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1862 PACKET3_DMA_DATA_DST_SEL(0) | 1863 PACKET3_DMA_DATA_SRC_SEL(2) | 1864 PACKET3_DMA_DATA_CP_SYNC; 1865 pm4[i++] = 0xdeadbeaf; 1866 pm4[i++] = 0; 1867 pm4[i++] = 0xfffffffc & bo_mc; 1868 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1869 pm4[i++] = sdma_write_length; 1870 } 1871 } 1872 1873 amdgpu_test_exec_cs_helper(context_handle, 1874 ip_type, ring_id, 1875 i, pm4, 1876 1, resources, 1877 ib_info, ibs_request); 1878 1879 /* verify if SDMA test result meets with expected */ 1880 i = 0; 1881 while(i < (sdma_write_length / 4)) { 1882 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1883 } 1884 1885 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1886 sdma_write_length); 1887 CU_ASSERT_EQUAL(r, 0); 1888 loop++; 1889 } 1890 } 1891 /* clean resources */ 1892 free(resources); 1893 free(ibs_request); 1894 free(ib_info); 1895 free(pm4); 1896 1897 /* end of test */ 1898 r = amdgpu_cs_ctx_free(context_handle); 1899 CU_ASSERT_EQUAL(r, 0); 1900 } 1901 1902 static void amdgpu_command_submission_sdma_const_fill(void) 1903 { 1904 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1905 } 1906 1907 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1908 { 1909 const int sdma_write_length = 1024; 1910 const int pm4_dw = 256; 1911 amdgpu_context_handle context_handle; 1912 amdgpu_bo_handle bo1, bo2; 1913 amdgpu_bo_handle *resources; 1914 uint32_t *pm4; 1915 struct amdgpu_cs_ib_info *ib_info; 1916 struct amdgpu_cs_request *ibs_request; 1917 uint64_t bo1_mc, bo2_mc; 1918 volatile unsigned char *bo1_cpu, *bo2_cpu; 1919 int i, j, r, loop1, loop2, ring_id; 1920 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1921 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1922 struct drm_amdgpu_info_hw_ip hw_ip_info; 1923 1924 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1925 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1926 1927 ib_info = calloc(1, sizeof(*ib_info)); 1928 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1929 1930 ibs_request = calloc(1, sizeof(*ibs_request)); 1931 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1932 1933 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1934 CU_ASSERT_EQUAL(r, 0); 1935 1936 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1937 CU_ASSERT_EQUAL(r, 0); 1938 1939 /* prepare resource */ 1940 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1941 CU_ASSERT_NOT_EQUAL(resources, NULL); 1942 1943 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1944 loop1 = loop2 = 0; 1945 /* run 9 circle to test all mapping combination */ 1946 while(loop1 < 2) { 1947 while(loop2 < 2) { 1948 /* allocate UC bo1for sDMA use */ 1949 r = amdgpu_bo_alloc_and_map(device_handle, 1950 sdma_write_length, 4096, 1951 AMDGPU_GEM_DOMAIN_GTT, 1952 gtt_flags[loop1], &bo1, 1953 (void**)&bo1_cpu, &bo1_mc, 1954 &bo1_va_handle); 1955 CU_ASSERT_EQUAL(r, 0); 1956 1957 /* set bo1 */ 1958 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1959 1960 /* allocate UC bo2 for sDMA use */ 1961 r = amdgpu_bo_alloc_and_map(device_handle, 1962 sdma_write_length, 4096, 1963 AMDGPU_GEM_DOMAIN_GTT, 1964 gtt_flags[loop2], &bo2, 1965 (void**)&bo2_cpu, &bo2_mc, 1966 &bo2_va_handle); 1967 CU_ASSERT_EQUAL(r, 0); 1968 1969 /* clear bo2 */ 1970 memset((void*)bo2_cpu, 0, sdma_write_length); 1971 1972 resources[0] = bo1; 1973 resources[1] = bo2; 1974 1975 /* fulfill PM4: test DMA copy linear */ 1976 i = j = 0; 1977 if (ip_type == AMDGPU_HW_IP_DMA) { 1978 if (family_id == AMDGPU_FAMILY_SI) { 1979 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1980 0, 0, 0, 1981 sdma_write_length); 1982 pm4[i++] = 0xffffffff & bo2_mc; 1983 pm4[i++] = 0xffffffff & bo1_mc; 1984 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1985 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1986 } else { 1987 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1988 SDMA_COPY_SUB_OPCODE_LINEAR, 1989 0); 1990 if (family_id >= AMDGPU_FAMILY_AI) 1991 pm4[i++] = sdma_write_length - 1; 1992 else 1993 pm4[i++] = sdma_write_length; 1994 pm4[i++] = 0; 1995 pm4[i++] = 0xffffffff & bo1_mc; 1996 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1997 pm4[i++] = 0xffffffff & bo2_mc; 1998 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1999 } 2000 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 2001 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 2002 if (family_id == AMDGPU_FAMILY_SI) { 2003 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 2004 pm4[i++] = 0xfffffffc & bo1_mc; 2005 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 2006 PACKET3_DMA_DATA_SI_DST_SEL(0) | 2007 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 2008 PACKET3_DMA_DATA_SI_CP_SYNC | 2009 (0xffff00000000 & bo1_mc) >> 32; 2010 pm4[i++] = 0xfffffffc & bo2_mc; 2011 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 2012 pm4[i++] = sdma_write_length; 2013 } else { 2014 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 2015 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 2016 PACKET3_DMA_DATA_DST_SEL(0) | 2017 PACKET3_DMA_DATA_SRC_SEL(0) | 2018 PACKET3_DMA_DATA_CP_SYNC; 2019 pm4[i++] = 0xfffffffc & bo1_mc; 2020 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 2021 pm4[i++] = 0xfffffffc & bo2_mc; 2022 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 2023 pm4[i++] = sdma_write_length; 2024 } 2025 } 2026 2027 amdgpu_test_exec_cs_helper(context_handle, 2028 ip_type, ring_id, 2029 i, pm4, 2030 2, resources, 2031 ib_info, ibs_request); 2032 2033 /* verify if SDMA test result meets with expected */ 2034 i = 0; 2035 while(i < sdma_write_length) { 2036 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 2037 } 2038 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 2039 sdma_write_length); 2040 CU_ASSERT_EQUAL(r, 0); 2041 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 2042 sdma_write_length); 2043 CU_ASSERT_EQUAL(r, 0); 2044 loop2++; 2045 } 2046 loop1++; 2047 } 2048 } 2049 /* clean resources */ 2050 free(resources); 2051 free(ibs_request); 2052 free(ib_info); 2053 free(pm4); 2054 2055 /* end of test */ 2056 r = amdgpu_cs_ctx_free(context_handle); 2057 CU_ASSERT_EQUAL(r, 0); 2058 } 2059 2060 static void amdgpu_command_submission_sdma_copy_linear(void) 2061 { 2062 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 2063 } 2064 2065 static void amdgpu_command_submission_sdma(void) 2066 { 2067 amdgpu_command_submission_sdma_write_linear(); 2068 amdgpu_command_submission_sdma_const_fill(); 2069 amdgpu_command_submission_sdma_copy_linear(); 2070 } 2071 2072 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 2073 { 2074 amdgpu_context_handle context_handle; 2075 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 2076 void *ib_result_cpu, *ib_result_ce_cpu; 2077 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 2078 struct amdgpu_cs_request ibs_request[2] = {0}; 2079 struct amdgpu_cs_ib_info ib_info[2]; 2080 struct amdgpu_cs_fence fence_status[2] = {0}; 2081 uint32_t *ptr; 2082 uint32_t expired; 2083 amdgpu_bo_list_handle bo_list; 2084 amdgpu_va_handle va_handle, va_handle_ce; 2085 int r; 2086 int i = 0, ib_cs_num = 2; 2087 struct drm_amdgpu_info_hw_ip info; 2088 2089 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2090 CU_ASSERT_EQUAL(r, 0); 2091 2092 if (info.hw_ip_version_major >= 11) 2093 return; 2094 2095 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2096 CU_ASSERT_EQUAL(r, 0); 2097 2098 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2099 AMDGPU_GEM_DOMAIN_GTT, 0, 2100 &ib_result_handle, &ib_result_cpu, 2101 &ib_result_mc_address, &va_handle); 2102 CU_ASSERT_EQUAL(r, 0); 2103 2104 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2105 AMDGPU_GEM_DOMAIN_GTT, 0, 2106 &ib_result_ce_handle, &ib_result_ce_cpu, 2107 &ib_result_ce_mc_address, &va_handle_ce); 2108 CU_ASSERT_EQUAL(r, 0); 2109 2110 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 2111 ib_result_ce_handle, &bo_list); 2112 CU_ASSERT_EQUAL(r, 0); 2113 2114 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 2115 2116 /* IT_SET_CE_DE_COUNTERS */ 2117 ptr = ib_result_ce_cpu; 2118 if (family_id != AMDGPU_FAMILY_SI) { 2119 ptr[i++] = 0xc0008900; 2120 ptr[i++] = 0; 2121 } 2122 ptr[i++] = 0xc0008400; 2123 ptr[i++] = 1; 2124 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 2125 ib_info[0].size = i; 2126 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 2127 2128 /* IT_WAIT_ON_CE_COUNTER */ 2129 ptr = ib_result_cpu; 2130 ptr[0] = 0xc0008600; 2131 ptr[1] = 0x00000001; 2132 ib_info[1].ib_mc_address = ib_result_mc_address; 2133 ib_info[1].size = 2; 2134 2135 for (i = 0; i < ib_cs_num; i++) { 2136 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 2137 ibs_request[i].number_of_ibs = 2; 2138 ibs_request[i].ibs = ib_info; 2139 ibs_request[i].resources = bo_list; 2140 ibs_request[i].fence_info.handle = NULL; 2141 } 2142 2143 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 2144 2145 CU_ASSERT_EQUAL(r, 0); 2146 2147 for (i = 0; i < ib_cs_num; i++) { 2148 fence_status[i].context = context_handle; 2149 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 2150 fence_status[i].fence = ibs_request[i].seq_no; 2151 } 2152 2153 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 2154 AMDGPU_TIMEOUT_INFINITE, 2155 &expired, NULL); 2156 CU_ASSERT_EQUAL(r, 0); 2157 2158 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2159 ib_result_mc_address, 4096); 2160 CU_ASSERT_EQUAL(r, 0); 2161 2162 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 2163 ib_result_ce_mc_address, 4096); 2164 CU_ASSERT_EQUAL(r, 0); 2165 2166 r = amdgpu_bo_list_destroy(bo_list); 2167 CU_ASSERT_EQUAL(r, 0); 2168 2169 r = amdgpu_cs_ctx_free(context_handle); 2170 CU_ASSERT_EQUAL(r, 0); 2171 } 2172 2173 static void amdgpu_command_submission_multi_fence(void) 2174 { 2175 amdgpu_command_submission_multi_fence_wait_all(true); 2176 amdgpu_command_submission_multi_fence_wait_all(false); 2177 } 2178 2179 static void amdgpu_userptr_test(void) 2180 { 2181 int i, r, j; 2182 uint32_t *pm4 = NULL; 2183 uint64_t bo_mc; 2184 void *ptr = NULL; 2185 int pm4_dw = 256; 2186 int sdma_write_length = 4; 2187 amdgpu_bo_handle handle; 2188 amdgpu_context_handle context_handle; 2189 struct amdgpu_cs_ib_info *ib_info; 2190 struct amdgpu_cs_request *ibs_request; 2191 amdgpu_bo_handle buf_handle; 2192 amdgpu_va_handle va_handle; 2193 2194 pm4 = calloc(pm4_dw, sizeof(*pm4)); 2195 CU_ASSERT_NOT_EQUAL(pm4, NULL); 2196 2197 ib_info = calloc(1, sizeof(*ib_info)); 2198 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 2199 2200 ibs_request = calloc(1, sizeof(*ibs_request)); 2201 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 2202 2203 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2204 CU_ASSERT_EQUAL(r, 0); 2205 2206 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 2207 CU_ASSERT_NOT_EQUAL(ptr, NULL); 2208 memset(ptr, 0, BUFFER_SIZE); 2209 2210 r = amdgpu_create_bo_from_user_mem(device_handle, 2211 ptr, BUFFER_SIZE, &buf_handle); 2212 CU_ASSERT_EQUAL(r, 0); 2213 2214 r = amdgpu_va_range_alloc(device_handle, 2215 amdgpu_gpu_va_range_general, 2216 BUFFER_SIZE, 1, 0, &bo_mc, 2217 &va_handle, 0); 2218 CU_ASSERT_EQUAL(r, 0); 2219 2220 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 2221 CU_ASSERT_EQUAL(r, 0); 2222 2223 handle = buf_handle; 2224 2225 j = i = 0; 2226 2227 if (family_id == AMDGPU_FAMILY_SI) 2228 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2229 sdma_write_length); 2230 else 2231 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2232 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 2233 pm4[i++] = 0xffffffff & bo_mc; 2234 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2235 if (family_id >= AMDGPU_FAMILY_AI) 2236 pm4[i++] = sdma_write_length - 1; 2237 else if (family_id != AMDGPU_FAMILY_SI) 2238 pm4[i++] = sdma_write_length; 2239 2240 while (j++ < sdma_write_length) 2241 pm4[i++] = 0xdeadbeaf; 2242 2243 if (!fork()) { 2244 pm4[0] = 0x0; 2245 exit(0); 2246 } 2247 2248 amdgpu_test_exec_cs_helper(context_handle, 2249 AMDGPU_HW_IP_DMA, 0, 2250 i, pm4, 2251 1, &handle, 2252 ib_info, ibs_request); 2253 i = 0; 2254 while (i < sdma_write_length) { 2255 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 2256 } 2257 free(ibs_request); 2258 free(ib_info); 2259 free(pm4); 2260 2261 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 2262 CU_ASSERT_EQUAL(r, 0); 2263 r = amdgpu_va_range_free(va_handle); 2264 CU_ASSERT_EQUAL(r, 0); 2265 r = amdgpu_bo_free(buf_handle); 2266 CU_ASSERT_EQUAL(r, 0); 2267 free(ptr); 2268 2269 r = amdgpu_cs_ctx_free(context_handle); 2270 CU_ASSERT_EQUAL(r, 0); 2271 2272 wait(NULL); 2273 } 2274 2275 static void amdgpu_sync_dependency_test(void) 2276 { 2277 amdgpu_context_handle context_handle[2]; 2278 amdgpu_bo_handle ib_result_handle; 2279 void *ib_result_cpu; 2280 uint64_t ib_result_mc_address; 2281 struct amdgpu_cs_request ibs_request; 2282 struct amdgpu_cs_ib_info ib_info; 2283 struct amdgpu_cs_fence fence_status; 2284 uint32_t expired; 2285 int i, j, r; 2286 amdgpu_bo_list_handle bo_list; 2287 amdgpu_va_handle va_handle; 2288 static uint32_t *ptr; 2289 uint64_t seq_no; 2290 2291 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 2292 CU_ASSERT_EQUAL(r, 0); 2293 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 2294 CU_ASSERT_EQUAL(r, 0); 2295 2296 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 2297 AMDGPU_GEM_DOMAIN_GTT, 0, 2298 &ib_result_handle, &ib_result_cpu, 2299 &ib_result_mc_address, &va_handle); 2300 CU_ASSERT_EQUAL(r, 0); 2301 2302 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 2303 &bo_list); 2304 CU_ASSERT_EQUAL(r, 0); 2305 2306 ptr = ib_result_cpu; 2307 i = 0; 2308 2309 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 2310 2311 /* Dispatch minimal init config and verify it's executed */ 2312 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2313 ptr[i++] = 0x80000000; 2314 ptr[i++] = 0x80000000; 2315 2316 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 2317 ptr[i++] = 0x80000000; 2318 2319 2320 /* Program compute regs */ 2321 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2322 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 2323 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 2324 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 2325 2326 2327 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2328 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 2329 /* 2330 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 2331 SGPRS = 1 2332 PRIORITY = 0 2333 FLOAT_MODE = 192 (0xc0) 2334 PRIV = 0 2335 DX10_CLAMP = 1 2336 DEBUG_MODE = 0 2337 IEEE_MODE = 0 2338 BULKY = 0 2339 CDBG_USER = 0 2340 * 2341 */ 2342 ptr[i++] = 0x002c0040; 2343 2344 2345 /* 2346 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 2347 USER_SGPR = 8 2348 TRAP_PRESENT = 0 2349 TGID_X_EN = 0 2350 TGID_Y_EN = 0 2351 TGID_Z_EN = 0 2352 TG_SIZE_EN = 0 2353 TIDIG_COMP_CNT = 0 2354 EXCP_EN_MSB = 0 2355 LDS_SIZE = 0 2356 EXCP_EN = 0 2357 * 2358 */ 2359 ptr[i++] = 0x00000010; 2360 2361 2362 /* 2363 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 2364 WAVESIZE = 0 2365 * 2366 */ 2367 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2368 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 2369 ptr[i++] = 0x00000100; 2370 2371 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2372 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 2373 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 2374 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2375 2376 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2377 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 2378 ptr[i++] = 0; 2379 2380 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2381 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 2382 ptr[i++] = 1; 2383 ptr[i++] = 1; 2384 ptr[i++] = 1; 2385 2386 2387 /* Dispatch */ 2388 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 2389 ptr[i++] = 1; 2390 ptr[i++] = 1; 2391 ptr[i++] = 1; 2392 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 2393 2394 2395 while (i & 7) 2396 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2397 2398 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2399 ib_info.ib_mc_address = ib_result_mc_address; 2400 ib_info.size = i; 2401 2402 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2403 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2404 ibs_request.ring = 0; 2405 ibs_request.number_of_ibs = 1; 2406 ibs_request.ibs = &ib_info; 2407 ibs_request.resources = bo_list; 2408 ibs_request.fence_info.handle = NULL; 2409 2410 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 2411 CU_ASSERT_EQUAL(r, 0); 2412 seq_no = ibs_request.seq_no; 2413 2414 2415 2416 /* Prepare second command with dependency on the first */ 2417 j = i; 2418 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 2419 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 2420 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 2421 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2422 ptr[i++] = 99; 2423 2424 while (i & 7) 2425 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2426 2427 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2428 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2429 ib_info.size = i - j; 2430 2431 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2432 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2433 ibs_request.ring = 0; 2434 ibs_request.number_of_ibs = 1; 2435 ibs_request.ibs = &ib_info; 2436 ibs_request.resources = bo_list; 2437 ibs_request.fence_info.handle = NULL; 2438 2439 ibs_request.number_of_dependencies = 1; 2440 2441 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2442 ibs_request.dependencies[0].context = context_handle[1]; 2443 ibs_request.dependencies[0].ip_instance = 0; 2444 ibs_request.dependencies[0].ring = 0; 2445 ibs_request.dependencies[0].fence = seq_no; 2446 2447 2448 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2449 CU_ASSERT_EQUAL(r, 0); 2450 2451 2452 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2453 fence_status.context = context_handle[0]; 2454 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2455 fence_status.ip_instance = 0; 2456 fence_status.ring = 0; 2457 fence_status.fence = ibs_request.seq_no; 2458 2459 r = amdgpu_cs_query_fence_status(&fence_status, 2460 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2461 CU_ASSERT_EQUAL(r, 0); 2462 2463 /* Expect the second command to wait for shader to complete */ 2464 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2465 2466 r = amdgpu_bo_list_destroy(bo_list); 2467 CU_ASSERT_EQUAL(r, 0); 2468 2469 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2470 ib_result_mc_address, 4096); 2471 CU_ASSERT_EQUAL(r, 0); 2472 2473 r = amdgpu_cs_ctx_free(context_handle[0]); 2474 CU_ASSERT_EQUAL(r, 0); 2475 r = amdgpu_cs_ctx_free(context_handle[1]); 2476 CU_ASSERT_EQUAL(r, 0); 2477 2478 free(ibs_request.dependencies); 2479 } 2480 2481 static void amdgpu_compute_dispatch_test(void) 2482 { 2483 amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE); 2484 } 2485 static void amdgpu_gfx_dispatch_test(void) 2486 { 2487 amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX); 2488 } 2489 2490 static void amdgpu_draw_test(void) 2491 { 2492 amdgpu_test_draw_helper(device_handle); 2493 } 2494 static void amdgpu_gpu_reset_test(void) 2495 { 2496 int r; 2497 char debugfs_path[256], tmp[10]; 2498 int fd; 2499 struct stat sbuf; 2500 amdgpu_context_handle context_handle; 2501 uint32_t hang_state, hangs; 2502 2503 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2504 CU_ASSERT_EQUAL(r, 0); 2505 2506 r = fstat(drm_amdgpu[0], &sbuf); 2507 CU_ASSERT_EQUAL(r, 0); 2508 2509 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 2510 fd = open(debugfs_path, O_RDONLY); 2511 CU_ASSERT(fd >= 0); 2512 2513 r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 2514 CU_ASSERT(r > 0); 2515 2516 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2517 CU_ASSERT_EQUAL(r, 0); 2518 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2519 2520 close(fd); 2521 r = amdgpu_cs_ctx_free(context_handle); 2522 CU_ASSERT_EQUAL(r, 0); 2523 2524 amdgpu_compute_dispatch_test(); 2525 amdgpu_gfx_dispatch_test(); 2526 } 2527 2528 static void amdgpu_stable_pstate_test(void) 2529 { 2530 int r; 2531 amdgpu_context_handle context_handle; 2532 uint32_t current_pstate = 0, new_pstate = 0; 2533 2534 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2535 CU_ASSERT_EQUAL(r, 0); 2536 2537 r = amdgpu_cs_ctx_stable_pstate(context_handle, 2538 AMDGPU_CTX_OP_GET_STABLE_PSTATE, 2539 0, ¤t_pstate); 2540 CU_ASSERT_EQUAL(r, 0); 2541 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE); 2542 2543 r = amdgpu_cs_ctx_stable_pstate(context_handle, 2544 AMDGPU_CTX_OP_SET_STABLE_PSTATE, 2545 AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL); 2546 CU_ASSERT_EQUAL(r, 0); 2547 2548 r = amdgpu_cs_ctx_stable_pstate(context_handle, 2549 AMDGPU_CTX_OP_GET_STABLE_PSTATE, 2550 0, &new_pstate); 2551 CU_ASSERT_EQUAL(r, 0); 2552 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK); 2553 2554 r = amdgpu_cs_ctx_free(context_handle); 2555 CU_ASSERT_EQUAL(r, 0); 2556 } 2557