1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
49
50 static void amdgpu_query_info_test(void);
51 static void amdgpu_command_submission_gfx(void);
52 static void amdgpu_command_submission_compute(void);
53 static void amdgpu_command_submission_multi_fence(void);
54 static void amdgpu_command_submission_sdma(void);
55 static void amdgpu_userptr_test(void);
56 static void amdgpu_semaphore_test(void);
57 static void amdgpu_sync_dependency_test(void);
58 static void amdgpu_bo_eviction_test(void);
59 static void amdgpu_compute_dispatch_test(void);
60 static void amdgpu_gfx_dispatch_test(void);
61 static void amdgpu_draw_test(void);
62 static void amdgpu_gpu_reset_test(void);
63
64 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
65 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
66 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
67 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
68 unsigned ip_type,
69 int instance, int pm4_dw, uint32_t *pm4_src,
70 int res_cnt, amdgpu_bo_handle *resources,
71 struct amdgpu_cs_ib_info *ib_info,
72 struct amdgpu_cs_request *ibs_request);
73
74 CU_TestInfo basic_tests[] = {
75 { "Query Info Test", amdgpu_query_info_test },
76 { "Userptr Test", amdgpu_userptr_test },
77 { "bo eviction Test", amdgpu_bo_eviction_test },
78 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
79 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
80 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
81 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
82 { "SW semaphore Test", amdgpu_semaphore_test },
83 { "Sync dependency Test", amdgpu_sync_dependency_test },
84 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
85 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
86 { "Draw Test", amdgpu_draw_test },
87 { "GPU reset Test", amdgpu_gpu_reset_test },
88 CU_TEST_INFO_NULL,
89 };
90 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
91 #define SDMA_PKT_HEADER_op_offset 0
92 #define SDMA_PKT_HEADER_op_mask 0x000000FF
93 #define SDMA_PKT_HEADER_op_shift 0
94 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
95 #define SDMA_OPCODE_CONSTANT_FILL 11
96 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
97 /* 0 = byte fill
98 * 2 = DW fill
99 */
100 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
101 (((sub_op) & 0xFF) << 8) | \
102 (((op) & 0xFF) << 0))
103 #define SDMA_OPCODE_WRITE 2
104 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
105 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
106
107 #define SDMA_OPCODE_COPY 1
108 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
109
110 #define SDMA_OPCODE_ATOMIC 10
111 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
112 /* 0 - single_pass_atomic.
113 * 1 - loop_until_compare_satisfied.
114 */
115 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
116 /* 0 - non-TMZ.
117 * 1 - TMZ.
118 */
119 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
120 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
121 * same as Packet 3
122 */
123
124 #define GFX_COMPUTE_NOP 0xffff1000
125 #define SDMA_NOP 0x0
126
127 /* PM4 */
128 #define PACKET_TYPE0 0
129 #define PACKET_TYPE1 1
130 #define PACKET_TYPE2 2
131 #define PACKET_TYPE3 3
132
133 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
134 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
135 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
136 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
137 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
138 ((reg) & 0xFFFF) | \
139 ((n) & 0x3FFF) << 16)
140 #define CP_PACKET2 0x80000000
141 #define PACKET2_PAD_SHIFT 0
142 #define PACKET2_PAD_MASK (0x3fffffff << 0)
143
144 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
145
146 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
147 (((op) & 0xFF) << 8) | \
148 ((n) & 0x3FFF) << 16)
149 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
150
151 /* Packet 3 types */
152 #define PACKET3_NOP 0x10
153
154 #define PACKET3_WRITE_DATA 0x37
155 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
156 /* 0 - register
157 * 1 - memory (sync - via GRBM)
158 * 2 - gl2
159 * 3 - gds
160 * 4 - reserved
161 * 5 - memory (async - direct)
162 */
163 #define WR_ONE_ADDR (1 << 16)
164 #define WR_CONFIRM (1 << 20)
165 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
166 /* 0 - LRU
167 * 1 - Stream
168 */
169 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
170 /* 0 - me
171 * 1 - pfp
172 * 2 - ce
173 */
174
175 #define PACKET3_ATOMIC_MEM 0x1E
176 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
177 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
178 /* 0 - single_pass_atomic.
179 * 1 - loop_until_compare_satisfied.
180 */
181 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
182 /* 0 - lru.
183 * 1 - stream.
184 */
185 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
186 /* 0 - micro_engine.
187 */
188
189 #define PACKET3_DMA_DATA 0x50
190 /* 1. header
191 * 2. CONTROL
192 * 3. SRC_ADDR_LO or DATA [31:0]
193 * 4. SRC_ADDR_HI [31:0]
194 * 5. DST_ADDR_LO [31:0]
195 * 6. DST_ADDR_HI [7:0]
196 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
197 */
198 /* CONTROL */
199 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
200 /* 0 - ME
201 * 1 - PFP
202 */
203 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
204 /* 0 - LRU
205 * 1 - Stream
206 * 2 - Bypass
207 */
208 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
209 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
210 /* 0 - DST_ADDR using DAS
211 * 1 - GDS
212 * 3 - DST_ADDR using L2
213 */
214 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
215 /* 0 - LRU
216 * 1 - Stream
217 * 2 - Bypass
218 */
219 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
220 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
221 /* 0 - SRC_ADDR using SAS
222 * 1 - GDS
223 * 2 - DATA
224 * 3 - SRC_ADDR using L2
225 */
226 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
227 /* COMMAND */
228 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
229 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
230 /* 0 - none
231 * 1 - 8 in 16
232 * 2 - 8 in 32
233 * 3 - 8 in 64
234 */
235 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
236 /* 0 - none
237 * 1 - 8 in 16
238 * 2 - 8 in 32
239 * 3 - 8 in 64
240 */
241 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
242 /* 0 - memory
243 * 1 - register
244 */
245 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
246 /* 0 - memory
247 * 1 - register
248 */
249 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
250 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
251 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
252
253 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
254 (((b) & 0x1) << 26) | \
255 (((t) & 0x1) << 23) | \
256 (((s) & 0x1) << 22) | \
257 (((cnt) & 0xFFFFF) << 0))
258 #define SDMA_OPCODE_COPY_SI 3
259 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
260 #define SDMA_NOP_SI 0xf
261 #define GFX_COMPUTE_NOP_SI 0x80000000
262 #define PACKET3_DMA_DATA_SI 0x41
263 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
264 /* 0 - ME
265 * 1 - PFP
266 */
267 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
268 /* 0 - DST_ADDR using DAS
269 * 1 - GDS
270 * 3 - DST_ADDR using L2
271 */
272 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
273 /* 0 - SRC_ADDR using SAS
274 * 1 - GDS
275 * 2 - DATA
276 * 3 - SRC_ADDR using L2
277 */
278 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
279
280
281 #define PKT3_CONTEXT_CONTROL 0x28
282 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
283 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
284 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
285
286 #define PKT3_CLEAR_STATE 0x12
287
288 #define PKT3_SET_SH_REG 0x76
289 #define PACKET3_SET_SH_REG_START 0x00002c00
290
291 #define PACKET3_DISPATCH_DIRECT 0x15
292 #define PACKET3_EVENT_WRITE 0x46
293 #define PACKET3_ACQUIRE_MEM 0x58
294 #define PACKET3_SET_CONTEXT_REG 0x69
295 #define PACKET3_SET_UCONFIG_REG 0x79
296 #define PACKET3_DRAW_INDEX_AUTO 0x2D
297 /* gfx 8 */
298 #define mmCOMPUTE_PGM_LO 0x2e0c
299 #define mmCOMPUTE_PGM_RSRC1 0x2e12
300 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
301 #define mmCOMPUTE_USER_DATA_0 0x2e40
302 #define mmCOMPUTE_USER_DATA_1 0x2e41
303 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
304 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
305
306
307
308 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
309 ((num & 0x0000ff00) << 8) | \
310 ((num & 0x00ff0000) >> 8) | \
311 ((num & 0x000000ff) << 24))
312
313
314 /* Shader code
315 * void main()
316 {
317
318 float x = some_input;
319 for (unsigned i = 0; i < 1000000; i++)
320 x = sin(x);
321
322 u[0] = 42u;
323 }
324 */
325
326 static uint32_t shader_bin[] = {
327 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
328 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
329 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
330 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
331 };
332
333 #define CODE_OFFSET 512
334 #define DATA_OFFSET 1024
335
336 enum cs_type {
337 CS_BUFFERCLEAR,
338 CS_BUFFERCOPY,
339 CS_HANG,
340 CS_HANG_SLOW
341 };
342
343 static const uint32_t bufferclear_cs_shader_gfx9[] = {
344 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
345 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
346 0xBF810000
347 };
348
349 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
350 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
351 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
352 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
353 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
354 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
355 };
356
357 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
358
359 static const uint32_t buffercopy_cs_shader_gfx9[] = {
360 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
361 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
362 };
363
364 static const uint32_t preamblecache_gfx9[] = {
365 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
366 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
367 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
368 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
369 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
370 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
371 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
372 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
373 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
374 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
375 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
376 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
377 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
378 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
379 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
380 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
381 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
382 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
383 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
384 0xc0017900, 0x24b, 0x0
385 };
386
387 enum ps_type {
388 PS_CONST,
389 PS_TEX,
390 PS_HANG,
391 PS_HANG_SLOW
392 };
393
394 static const uint32_t ps_const_shader_gfx9[] = {
395 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
396 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
397 0xC4001C0F, 0x00000100, 0xBF810000
398 };
399
400 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
401
402 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
403 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
404 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
405 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
406 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
407 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
408 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
409 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
410 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
411 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
412 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
413 }
414 };
415
416 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
417 0x00000004
418 };
419
420 static const uint32_t ps_num_sh_registers_gfx9 = 2;
421
422 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
423 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
424 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
425 };
426
427 static const uint32_t ps_num_context_registers_gfx9 = 7;
428
429 static const uint32_t ps_const_context_reg_gfx9[][2] = {
430 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
431 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
432 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
433 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
434 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
435 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
436 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
437 };
438
439 static const uint32_t ps_tex_shader_gfx9[] = {
440 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
441 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
442 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
443 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
444 0x00000100, 0xBF810000
445 };
446
447 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
448 0x0000000B
449 };
450
451 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
452
453 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
454 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
455 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
456 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
458 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
459 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
460 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
461 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
462 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
464 }
465 };
466
467 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
468 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
469 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
470 };
471
472 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
473 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
474 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
475 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
476 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
477 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
478 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
479 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
480 };
481
482 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
483 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
484 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
485 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
486 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
487 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
488 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
489 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
490 0xC400020F, 0x05060403, 0xBF810000
491 };
492
493 static const uint32_t cached_cmd_gfx9[] = {
494 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
495 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
496 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
497 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
498 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
499 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
500 0xc0026900, 0x292, 0x20, 0x60201b8,
501 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
502 };
503
504 unsigned int memcpy_ps_hang[] = {
505 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
506 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
507 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
508 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
509 0xF800180F, 0x03020100, 0xBF810000
510 };
511
512 struct amdgpu_test_shader {
513 uint32_t *shader;
514 uint32_t header_length;
515 uint32_t body_length;
516 uint32_t foot_length;
517 };
518
519 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
520 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
521 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
522 };
523
524 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
525 memcpy_cs_hang_slow_ai_codes,
526 4,
527 3,
528 1
529 };
530
531 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
532 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
533 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
534 };
535
536 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
537 memcpy_cs_hang_slow_rv_codes,
538 4,
539 3,
540 1
541 };
542
543 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
544 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
545 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
546 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
547 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
548 0x03020100, 0xbf810000
549 };
550
551 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
552 memcpy_ps_hang_slow_ai_codes,
553 7,
554 2,
555 9
556 };
557
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)558 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
559 unsigned alignment, unsigned heap, uint64_t alloc_flags,
560 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
561 uint64_t *mc_address,
562 amdgpu_va_handle *va_handle)
563 {
564 struct amdgpu_bo_alloc_request request = {};
565 amdgpu_bo_handle buf_handle;
566 amdgpu_va_handle handle;
567 uint64_t vmc_addr;
568 int r;
569
570 request.alloc_size = size;
571 request.phys_alignment = alignment;
572 request.preferred_heap = heap;
573 request.flags = alloc_flags;
574
575 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
576 if (r)
577 return r;
578
579 r = amdgpu_va_range_alloc(dev,
580 amdgpu_gpu_va_range_general,
581 size, alignment, 0, &vmc_addr,
582 &handle, 0);
583 if (r)
584 goto error_va_alloc;
585
586 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
587 AMDGPU_VM_PAGE_READABLE |
588 AMDGPU_VM_PAGE_WRITEABLE |
589 AMDGPU_VM_PAGE_EXECUTABLE |
590 mapping_flags,
591 AMDGPU_VA_OP_MAP);
592 if (r)
593 goto error_va_map;
594
595 r = amdgpu_bo_cpu_map(buf_handle, cpu);
596 if (r)
597 goto error_cpu_map;
598
599 *bo = buf_handle;
600 *mc_address = vmc_addr;
601 *va_handle = handle;
602
603 return 0;
604
605 error_cpu_map:
606 amdgpu_bo_cpu_unmap(buf_handle);
607
608 error_va_map:
609 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
610
611 error_va_alloc:
612 amdgpu_bo_free(buf_handle);
613 return r;
614 }
615
616
617
suite_basic_tests_enable(void)618 CU_BOOL suite_basic_tests_enable(void)
619 {
620 uint32_t asic_id;
621
622 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
623 &minor_version, &device_handle))
624 return CU_FALSE;
625
626 asic_id = device_handle->info.asic_id;
627
628 if (amdgpu_device_deinitialize(device_handle))
629 return CU_FALSE;
630
631 /* disable gfx engine basic test cases for Arturus due to no CPG */
632 if (asic_is_arcturus(asic_id)) {
633 if (amdgpu_set_test_active("Basic Tests",
634 "Command submission Test (GFX)",
635 CU_FALSE))
636 fprintf(stderr, "test deactivation failed - %s\n",
637 CU_get_error_msg());
638
639 if (amdgpu_set_test_active("Basic Tests",
640 "Command submission Test (Multi-Fence)",
641 CU_FALSE))
642 fprintf(stderr, "test deactivation failed - %s\n",
643 CU_get_error_msg());
644
645 if (amdgpu_set_test_active("Basic Tests",
646 "Sync dependency Test",
647 CU_FALSE))
648 fprintf(stderr, "test deactivation failed - %s\n",
649 CU_get_error_msg());
650 }
651
652 return CU_TRUE;
653 }
654
suite_basic_tests_init(void)655 int suite_basic_tests_init(void)
656 {
657 struct amdgpu_gpu_info gpu_info = {0};
658 int r;
659
660 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
661 &minor_version, &device_handle);
662
663 if (r) {
664 if ((r == -EACCES) && (errno == EACCES))
665 printf("\n\nError:%s. "
666 "Hint:Try to run this test program as root.",
667 strerror(errno));
668 return CUE_SINIT_FAILED;
669 }
670
671 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
672 if (r)
673 return CUE_SINIT_FAILED;
674
675 family_id = gpu_info.family_id;
676
677 return CUE_SUCCESS;
678 }
679
suite_basic_tests_clean(void)680 int suite_basic_tests_clean(void)
681 {
682 int r = amdgpu_device_deinitialize(device_handle);
683
684 if (r == 0)
685 return CUE_SUCCESS;
686 else
687 return CUE_SCLEAN_FAILED;
688 }
689
amdgpu_query_info_test(void)690 static void amdgpu_query_info_test(void)
691 {
692 struct amdgpu_gpu_info gpu_info = {0};
693 uint32_t version, feature;
694 int r;
695
696 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
697 CU_ASSERT_EQUAL(r, 0);
698
699 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
700 0, &version, &feature);
701 CU_ASSERT_EQUAL(r, 0);
702 }
703
amdgpu_command_submission_gfx_separate_ibs(void)704 static void amdgpu_command_submission_gfx_separate_ibs(void)
705 {
706 amdgpu_context_handle context_handle;
707 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
708 void *ib_result_cpu, *ib_result_ce_cpu;
709 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
710 struct amdgpu_cs_request ibs_request = {0};
711 struct amdgpu_cs_ib_info ib_info[2];
712 struct amdgpu_cs_fence fence_status = {0};
713 uint32_t *ptr;
714 uint32_t expired;
715 amdgpu_bo_list_handle bo_list;
716 amdgpu_va_handle va_handle, va_handle_ce;
717 int r, i = 0;
718
719 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
720 CU_ASSERT_EQUAL(r, 0);
721
722 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
723 AMDGPU_GEM_DOMAIN_GTT, 0,
724 &ib_result_handle, &ib_result_cpu,
725 &ib_result_mc_address, &va_handle);
726 CU_ASSERT_EQUAL(r, 0);
727
728 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729 AMDGPU_GEM_DOMAIN_GTT, 0,
730 &ib_result_ce_handle, &ib_result_ce_cpu,
731 &ib_result_ce_mc_address, &va_handle_ce);
732 CU_ASSERT_EQUAL(r, 0);
733
734 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
735 ib_result_ce_handle, &bo_list);
736 CU_ASSERT_EQUAL(r, 0);
737
738 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
739
740 /* IT_SET_CE_DE_COUNTERS */
741 ptr = ib_result_ce_cpu;
742 if (family_id != AMDGPU_FAMILY_SI) {
743 ptr[i++] = 0xc0008900;
744 ptr[i++] = 0;
745 }
746 ptr[i++] = 0xc0008400;
747 ptr[i++] = 1;
748 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
749 ib_info[0].size = i;
750 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
751
752 /* IT_WAIT_ON_CE_COUNTER */
753 ptr = ib_result_cpu;
754 ptr[0] = 0xc0008600;
755 ptr[1] = 0x00000001;
756 ib_info[1].ib_mc_address = ib_result_mc_address;
757 ib_info[1].size = 2;
758
759 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
760 ibs_request.number_of_ibs = 2;
761 ibs_request.ibs = ib_info;
762 ibs_request.resources = bo_list;
763 ibs_request.fence_info.handle = NULL;
764
765 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
766
767 CU_ASSERT_EQUAL(r, 0);
768
769 fence_status.context = context_handle;
770 fence_status.ip_type = AMDGPU_HW_IP_GFX;
771 fence_status.ip_instance = 0;
772 fence_status.fence = ibs_request.seq_no;
773
774 r = amdgpu_cs_query_fence_status(&fence_status,
775 AMDGPU_TIMEOUT_INFINITE,
776 0, &expired);
777 CU_ASSERT_EQUAL(r, 0);
778
779 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
780 ib_result_mc_address, 4096);
781 CU_ASSERT_EQUAL(r, 0);
782
783 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
784 ib_result_ce_mc_address, 4096);
785 CU_ASSERT_EQUAL(r, 0);
786
787 r = amdgpu_bo_list_destroy(bo_list);
788 CU_ASSERT_EQUAL(r, 0);
789
790 r = amdgpu_cs_ctx_free(context_handle);
791 CU_ASSERT_EQUAL(r, 0);
792
793 }
794
amdgpu_command_submission_gfx_shared_ib(void)795 static void amdgpu_command_submission_gfx_shared_ib(void)
796 {
797 amdgpu_context_handle context_handle;
798 amdgpu_bo_handle ib_result_handle;
799 void *ib_result_cpu;
800 uint64_t ib_result_mc_address;
801 struct amdgpu_cs_request ibs_request = {0};
802 struct amdgpu_cs_ib_info ib_info[2];
803 struct amdgpu_cs_fence fence_status = {0};
804 uint32_t *ptr;
805 uint32_t expired;
806 amdgpu_bo_list_handle bo_list;
807 amdgpu_va_handle va_handle;
808 int r, i = 0;
809
810 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
811 CU_ASSERT_EQUAL(r, 0);
812
813 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
814 AMDGPU_GEM_DOMAIN_GTT, 0,
815 &ib_result_handle, &ib_result_cpu,
816 &ib_result_mc_address, &va_handle);
817 CU_ASSERT_EQUAL(r, 0);
818
819 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
820 &bo_list);
821 CU_ASSERT_EQUAL(r, 0);
822
823 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
824
825 /* IT_SET_CE_DE_COUNTERS */
826 ptr = ib_result_cpu;
827 if (family_id != AMDGPU_FAMILY_SI) {
828 ptr[i++] = 0xc0008900;
829 ptr[i++] = 0;
830 }
831 ptr[i++] = 0xc0008400;
832 ptr[i++] = 1;
833 ib_info[0].ib_mc_address = ib_result_mc_address;
834 ib_info[0].size = i;
835 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
836
837 ptr = (uint32_t *)ib_result_cpu + 4;
838 ptr[0] = 0xc0008600;
839 ptr[1] = 0x00000001;
840 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
841 ib_info[1].size = 2;
842
843 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
844 ibs_request.number_of_ibs = 2;
845 ibs_request.ibs = ib_info;
846 ibs_request.resources = bo_list;
847 ibs_request.fence_info.handle = NULL;
848
849 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
850
851 CU_ASSERT_EQUAL(r, 0);
852
853 fence_status.context = context_handle;
854 fence_status.ip_type = AMDGPU_HW_IP_GFX;
855 fence_status.ip_instance = 0;
856 fence_status.fence = ibs_request.seq_no;
857
858 r = amdgpu_cs_query_fence_status(&fence_status,
859 AMDGPU_TIMEOUT_INFINITE,
860 0, &expired);
861 CU_ASSERT_EQUAL(r, 0);
862
863 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
864 ib_result_mc_address, 4096);
865 CU_ASSERT_EQUAL(r, 0);
866
867 r = amdgpu_bo_list_destroy(bo_list);
868 CU_ASSERT_EQUAL(r, 0);
869
870 r = amdgpu_cs_ctx_free(context_handle);
871 CU_ASSERT_EQUAL(r, 0);
872 }
873
amdgpu_command_submission_gfx_cp_write_data(void)874 static void amdgpu_command_submission_gfx_cp_write_data(void)
875 {
876 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
877 }
878
amdgpu_command_submission_gfx_cp_const_fill(void)879 static void amdgpu_command_submission_gfx_cp_const_fill(void)
880 {
881 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
882 }
883
amdgpu_command_submission_gfx_cp_copy_data(void)884 static void amdgpu_command_submission_gfx_cp_copy_data(void)
885 {
886 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
887 }
888
amdgpu_bo_eviction_test(void)889 static void amdgpu_bo_eviction_test(void)
890 {
891 const int sdma_write_length = 1024;
892 const int pm4_dw = 256;
893 amdgpu_context_handle context_handle;
894 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
895 amdgpu_bo_handle *resources;
896 uint32_t *pm4;
897 struct amdgpu_cs_ib_info *ib_info;
898 struct amdgpu_cs_request *ibs_request;
899 uint64_t bo1_mc, bo2_mc;
900 volatile unsigned char *bo1_cpu, *bo2_cpu;
901 int i, j, r, loop1, loop2;
902 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
903 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
904 struct amdgpu_heap_info vram_info, gtt_info;
905
906 pm4 = calloc(pm4_dw, sizeof(*pm4));
907 CU_ASSERT_NOT_EQUAL(pm4, NULL);
908
909 ib_info = calloc(1, sizeof(*ib_info));
910 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
911
912 ibs_request = calloc(1, sizeof(*ibs_request));
913 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
914
915 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
916 CU_ASSERT_EQUAL(r, 0);
917
918 /* prepare resource */
919 resources = calloc(4, sizeof(amdgpu_bo_handle));
920 CU_ASSERT_NOT_EQUAL(resources, NULL);
921
922 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
923 0, &vram_info);
924 CU_ASSERT_EQUAL(r, 0);
925
926 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
927 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
928 CU_ASSERT_EQUAL(r, 0);
929 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
930 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
931 CU_ASSERT_EQUAL(r, 0);
932
933 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
934 0, >t_info);
935 CU_ASSERT_EQUAL(r, 0);
936
937 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
938 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
939 CU_ASSERT_EQUAL(r, 0);
940 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
941 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
942 CU_ASSERT_EQUAL(r, 0);
943
944
945
946 loop1 = loop2 = 0;
947 /* run 9 circle to test all mapping combination */
948 while(loop1 < 2) {
949 while(loop2 < 2) {
950 /* allocate UC bo1for sDMA use */
951 r = amdgpu_bo_alloc_and_map(device_handle,
952 sdma_write_length, 4096,
953 AMDGPU_GEM_DOMAIN_GTT,
954 gtt_flags[loop1], &bo1,
955 (void**)&bo1_cpu, &bo1_mc,
956 &bo1_va_handle);
957 CU_ASSERT_EQUAL(r, 0);
958
959 /* set bo1 */
960 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
961
962 /* allocate UC bo2 for sDMA use */
963 r = amdgpu_bo_alloc_and_map(device_handle,
964 sdma_write_length, 4096,
965 AMDGPU_GEM_DOMAIN_GTT,
966 gtt_flags[loop2], &bo2,
967 (void**)&bo2_cpu, &bo2_mc,
968 &bo2_va_handle);
969 CU_ASSERT_EQUAL(r, 0);
970
971 /* clear bo2 */
972 memset((void*)bo2_cpu, 0, sdma_write_length);
973
974 resources[0] = bo1;
975 resources[1] = bo2;
976 resources[2] = vram_max[loop2];
977 resources[3] = gtt_max[loop2];
978
979 /* fulfill PM4: test DMA copy linear */
980 i = j = 0;
981 if (family_id == AMDGPU_FAMILY_SI) {
982 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
983 sdma_write_length);
984 pm4[i++] = 0xffffffff & bo2_mc;
985 pm4[i++] = 0xffffffff & bo1_mc;
986 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
987 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
988 } else {
989 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
990 if (family_id >= AMDGPU_FAMILY_AI)
991 pm4[i++] = sdma_write_length - 1;
992 else
993 pm4[i++] = sdma_write_length;
994 pm4[i++] = 0;
995 pm4[i++] = 0xffffffff & bo1_mc;
996 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
997 pm4[i++] = 0xffffffff & bo2_mc;
998 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
999 }
1000
1001 amdgpu_test_exec_cs_helper(context_handle,
1002 AMDGPU_HW_IP_DMA, 0,
1003 i, pm4,
1004 4, resources,
1005 ib_info, ibs_request);
1006
1007 /* verify if SDMA test result meets with expected */
1008 i = 0;
1009 while(i < sdma_write_length) {
1010 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1011 }
1012 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1013 sdma_write_length);
1014 CU_ASSERT_EQUAL(r, 0);
1015 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1016 sdma_write_length);
1017 CU_ASSERT_EQUAL(r, 0);
1018 loop2++;
1019 }
1020 loop2 = 0;
1021 loop1++;
1022 }
1023 amdgpu_bo_free(vram_max[0]);
1024 amdgpu_bo_free(vram_max[1]);
1025 amdgpu_bo_free(gtt_max[0]);
1026 amdgpu_bo_free(gtt_max[1]);
1027 /* clean resources */
1028 free(resources);
1029 free(ibs_request);
1030 free(ib_info);
1031 free(pm4);
1032
1033 /* end of test */
1034 r = amdgpu_cs_ctx_free(context_handle);
1035 CU_ASSERT_EQUAL(r, 0);
1036 }
1037
1038
amdgpu_command_submission_gfx(void)1039 static void amdgpu_command_submission_gfx(void)
1040 {
1041 /* write data using the CP */
1042 amdgpu_command_submission_gfx_cp_write_data();
1043 /* const fill using the CP */
1044 amdgpu_command_submission_gfx_cp_const_fill();
1045 /* copy data using the CP */
1046 amdgpu_command_submission_gfx_cp_copy_data();
1047 /* separate IB buffers for multi-IB submission */
1048 amdgpu_command_submission_gfx_separate_ibs();
1049 /* shared IB buffer for multi-IB submission */
1050 amdgpu_command_submission_gfx_shared_ib();
1051 }
1052
amdgpu_semaphore_test(void)1053 static void amdgpu_semaphore_test(void)
1054 {
1055 amdgpu_context_handle context_handle[2];
1056 amdgpu_semaphore_handle sem;
1057 amdgpu_bo_handle ib_result_handle[2];
1058 void *ib_result_cpu[2];
1059 uint64_t ib_result_mc_address[2];
1060 struct amdgpu_cs_request ibs_request[2] = {0};
1061 struct amdgpu_cs_ib_info ib_info[2] = {0};
1062 struct amdgpu_cs_fence fence_status = {0};
1063 uint32_t *ptr;
1064 uint32_t expired;
1065 uint32_t sdma_nop, gfx_nop;
1066 amdgpu_bo_list_handle bo_list[2];
1067 amdgpu_va_handle va_handle[2];
1068 int r, i;
1069
1070 if (family_id == AMDGPU_FAMILY_SI) {
1071 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1072 gfx_nop = GFX_COMPUTE_NOP_SI;
1073 } else {
1074 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1075 gfx_nop = GFX_COMPUTE_NOP;
1076 }
1077
1078 r = amdgpu_cs_create_semaphore(&sem);
1079 CU_ASSERT_EQUAL(r, 0);
1080 for (i = 0; i < 2; i++) {
1081 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1082 CU_ASSERT_EQUAL(r, 0);
1083
1084 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1085 AMDGPU_GEM_DOMAIN_GTT, 0,
1086 &ib_result_handle[i], &ib_result_cpu[i],
1087 &ib_result_mc_address[i], &va_handle[i]);
1088 CU_ASSERT_EQUAL(r, 0);
1089
1090 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1091 NULL, &bo_list[i]);
1092 CU_ASSERT_EQUAL(r, 0);
1093 }
1094
1095 /* 1. same context different engine */
1096 ptr = ib_result_cpu[0];
1097 ptr[0] = sdma_nop;
1098 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1099 ib_info[0].size = 1;
1100
1101 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1102 ibs_request[0].number_of_ibs = 1;
1103 ibs_request[0].ibs = &ib_info[0];
1104 ibs_request[0].resources = bo_list[0];
1105 ibs_request[0].fence_info.handle = NULL;
1106 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1107 CU_ASSERT_EQUAL(r, 0);
1108 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1109 CU_ASSERT_EQUAL(r, 0);
1110
1111 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1112 CU_ASSERT_EQUAL(r, 0);
1113 ptr = ib_result_cpu[1];
1114 ptr[0] = gfx_nop;
1115 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1116 ib_info[1].size = 1;
1117
1118 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1119 ibs_request[1].number_of_ibs = 1;
1120 ibs_request[1].ibs = &ib_info[1];
1121 ibs_request[1].resources = bo_list[1];
1122 ibs_request[1].fence_info.handle = NULL;
1123
1124 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1125 CU_ASSERT_EQUAL(r, 0);
1126
1127 fence_status.context = context_handle[0];
1128 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1129 fence_status.ip_instance = 0;
1130 fence_status.fence = ibs_request[1].seq_no;
1131 r = amdgpu_cs_query_fence_status(&fence_status,
1132 500000000, 0, &expired);
1133 CU_ASSERT_EQUAL(r, 0);
1134 CU_ASSERT_EQUAL(expired, true);
1135
1136 /* 2. same engine different context */
1137 ptr = ib_result_cpu[0];
1138 ptr[0] = gfx_nop;
1139 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1140 ib_info[0].size = 1;
1141
1142 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1143 ibs_request[0].number_of_ibs = 1;
1144 ibs_request[0].ibs = &ib_info[0];
1145 ibs_request[0].resources = bo_list[0];
1146 ibs_request[0].fence_info.handle = NULL;
1147 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1148 CU_ASSERT_EQUAL(r, 0);
1149 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1150 CU_ASSERT_EQUAL(r, 0);
1151
1152 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1153 CU_ASSERT_EQUAL(r, 0);
1154 ptr = ib_result_cpu[1];
1155 ptr[0] = gfx_nop;
1156 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1157 ib_info[1].size = 1;
1158
1159 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1160 ibs_request[1].number_of_ibs = 1;
1161 ibs_request[1].ibs = &ib_info[1];
1162 ibs_request[1].resources = bo_list[1];
1163 ibs_request[1].fence_info.handle = NULL;
1164 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1165
1166 CU_ASSERT_EQUAL(r, 0);
1167
1168 fence_status.context = context_handle[1];
1169 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1170 fence_status.ip_instance = 0;
1171 fence_status.fence = ibs_request[1].seq_no;
1172 r = amdgpu_cs_query_fence_status(&fence_status,
1173 500000000, 0, &expired);
1174 CU_ASSERT_EQUAL(r, 0);
1175 CU_ASSERT_EQUAL(expired, true);
1176
1177 for (i = 0; i < 2; i++) {
1178 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1179 ib_result_mc_address[i], 4096);
1180 CU_ASSERT_EQUAL(r, 0);
1181
1182 r = amdgpu_bo_list_destroy(bo_list[i]);
1183 CU_ASSERT_EQUAL(r, 0);
1184
1185 r = amdgpu_cs_ctx_free(context_handle[i]);
1186 CU_ASSERT_EQUAL(r, 0);
1187 }
1188
1189 r = amdgpu_cs_destroy_semaphore(sem);
1190 CU_ASSERT_EQUAL(r, 0);
1191 }
1192
amdgpu_command_submission_compute_nop(void)1193 static void amdgpu_command_submission_compute_nop(void)
1194 {
1195 amdgpu_context_handle context_handle;
1196 amdgpu_bo_handle ib_result_handle;
1197 void *ib_result_cpu;
1198 uint64_t ib_result_mc_address;
1199 struct amdgpu_cs_request ibs_request;
1200 struct amdgpu_cs_ib_info ib_info;
1201 struct amdgpu_cs_fence fence_status;
1202 uint32_t *ptr;
1203 uint32_t expired;
1204 int r, instance;
1205 amdgpu_bo_list_handle bo_list;
1206 amdgpu_va_handle va_handle;
1207 struct drm_amdgpu_info_hw_ip info;
1208
1209 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1210 CU_ASSERT_EQUAL(r, 0);
1211
1212 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1213 CU_ASSERT_EQUAL(r, 0);
1214
1215 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1216 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1217 AMDGPU_GEM_DOMAIN_GTT, 0,
1218 &ib_result_handle, &ib_result_cpu,
1219 &ib_result_mc_address, &va_handle);
1220 CU_ASSERT_EQUAL(r, 0);
1221
1222 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1223 &bo_list);
1224 CU_ASSERT_EQUAL(r, 0);
1225
1226 ptr = ib_result_cpu;
1227 memset(ptr, 0, 16);
1228 ptr[0]=PACKET3(PACKET3_NOP, 14);
1229
1230 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1231 ib_info.ib_mc_address = ib_result_mc_address;
1232 ib_info.size = 16;
1233
1234 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1235 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1236 ibs_request.ring = instance;
1237 ibs_request.number_of_ibs = 1;
1238 ibs_request.ibs = &ib_info;
1239 ibs_request.resources = bo_list;
1240 ibs_request.fence_info.handle = NULL;
1241
1242 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1243 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1244 CU_ASSERT_EQUAL(r, 0);
1245
1246 fence_status.context = context_handle;
1247 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1248 fence_status.ip_instance = 0;
1249 fence_status.ring = instance;
1250 fence_status.fence = ibs_request.seq_no;
1251
1252 r = amdgpu_cs_query_fence_status(&fence_status,
1253 AMDGPU_TIMEOUT_INFINITE,
1254 0, &expired);
1255 CU_ASSERT_EQUAL(r, 0);
1256
1257 r = amdgpu_bo_list_destroy(bo_list);
1258 CU_ASSERT_EQUAL(r, 0);
1259
1260 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1261 ib_result_mc_address, 4096);
1262 CU_ASSERT_EQUAL(r, 0);
1263 }
1264
1265 r = amdgpu_cs_ctx_free(context_handle);
1266 CU_ASSERT_EQUAL(r, 0);
1267 }
1268
amdgpu_command_submission_compute_cp_write_data(void)1269 static void amdgpu_command_submission_compute_cp_write_data(void)
1270 {
1271 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1272 }
1273
amdgpu_command_submission_compute_cp_const_fill(void)1274 static void amdgpu_command_submission_compute_cp_const_fill(void)
1275 {
1276 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1277 }
1278
amdgpu_command_submission_compute_cp_copy_data(void)1279 static void amdgpu_command_submission_compute_cp_copy_data(void)
1280 {
1281 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1282 }
1283
amdgpu_command_submission_compute(void)1284 static void amdgpu_command_submission_compute(void)
1285 {
1286 /* write data using the CP */
1287 amdgpu_command_submission_compute_cp_write_data();
1288 /* const fill using the CP */
1289 amdgpu_command_submission_compute_cp_const_fill();
1290 /* copy data using the CP */
1291 amdgpu_command_submission_compute_cp_copy_data();
1292 /* nop test */
1293 amdgpu_command_submission_compute_nop();
1294 }
1295
1296 /*
1297 * caller need create/release:
1298 * pm4_src, resources, ib_info, and ibs_request
1299 * submit command stream described in ibs_request and wait for this IB accomplished
1300 */
1301 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1302 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1303 amdgpu_context_handle context_handle,
1304 unsigned ip_type, int instance, int pm4_dw,
1305 uint32_t *pm4_src, int res_cnt,
1306 amdgpu_bo_handle *resources,
1307 struct amdgpu_cs_ib_info *ib_info,
1308 struct amdgpu_cs_request *ibs_request,
1309 bool secure)
1310 {
1311 int r;
1312 uint32_t expired;
1313 uint32_t *ring_ptr;
1314 amdgpu_bo_handle ib_result_handle;
1315 void *ib_result_cpu;
1316 uint64_t ib_result_mc_address;
1317 struct amdgpu_cs_fence fence_status = {0};
1318 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1319 amdgpu_va_handle va_handle;
1320
1321 /* prepare CS */
1322 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1323 CU_ASSERT_NOT_EQUAL(resources, NULL);
1324 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1325 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1326 CU_ASSERT_TRUE(pm4_dw <= 1024);
1327
1328 /* allocate IB */
1329 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1330 AMDGPU_GEM_DOMAIN_GTT, 0,
1331 &ib_result_handle, &ib_result_cpu,
1332 &ib_result_mc_address, &va_handle);
1333 CU_ASSERT_EQUAL(r, 0);
1334
1335 /* copy PM4 packet to ring from caller */
1336 ring_ptr = ib_result_cpu;
1337 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1338
1339 ib_info->ib_mc_address = ib_result_mc_address;
1340 ib_info->size = pm4_dw;
1341 if (secure)
1342 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1343
1344 ibs_request->ip_type = ip_type;
1345 ibs_request->ring = instance;
1346 ibs_request->number_of_ibs = 1;
1347 ibs_request->ibs = ib_info;
1348 ibs_request->fence_info.handle = NULL;
1349
1350 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1351 all_res[res_cnt] = ib_result_handle;
1352
1353 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1354 NULL, &ibs_request->resources);
1355 CU_ASSERT_EQUAL(r, 0);
1356
1357 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1358
1359 /* submit CS */
1360 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1361 CU_ASSERT_EQUAL(r, 0);
1362
1363 r = amdgpu_bo_list_destroy(ibs_request->resources);
1364 CU_ASSERT_EQUAL(r, 0);
1365
1366 fence_status.ip_type = ip_type;
1367 fence_status.ip_instance = 0;
1368 fence_status.ring = ibs_request->ring;
1369 fence_status.context = context_handle;
1370 fence_status.fence = ibs_request->seq_no;
1371
1372 /* wait for IB accomplished */
1373 r = amdgpu_cs_query_fence_status(&fence_status,
1374 AMDGPU_TIMEOUT_INFINITE,
1375 0, &expired);
1376 CU_ASSERT_EQUAL(r, 0);
1377 CU_ASSERT_EQUAL(expired, true);
1378
1379 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1380 ib_result_mc_address, 4096);
1381 CU_ASSERT_EQUAL(r, 0);
1382 }
1383
1384 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1385 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1386 unsigned ip_type, int instance, int pm4_dw,
1387 uint32_t *pm4_src, int res_cnt,
1388 amdgpu_bo_handle *resources,
1389 struct amdgpu_cs_ib_info *ib_info,
1390 struct amdgpu_cs_request *ibs_request)
1391 {
1392 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1393 ip_type, instance, pm4_dw, pm4_src,
1394 res_cnt, resources, ib_info,
1395 ibs_request, false);
1396 }
1397
1398 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1399 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1400 device, unsigned
1401 ip_type, bool secure)
1402 {
1403 const int sdma_write_length = 128;
1404 const int pm4_dw = 256;
1405 amdgpu_context_handle context_handle;
1406 amdgpu_bo_handle bo;
1407 amdgpu_bo_handle *resources;
1408 uint32_t *pm4;
1409 struct amdgpu_cs_ib_info *ib_info;
1410 struct amdgpu_cs_request *ibs_request;
1411 uint64_t bo_mc;
1412 volatile uint32_t *bo_cpu;
1413 uint32_t bo_cpu_origin;
1414 int i, j, r, loop, ring_id;
1415 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1416 amdgpu_va_handle va_handle;
1417 struct drm_amdgpu_info_hw_ip hw_ip_info;
1418
1419 pm4 = calloc(pm4_dw, sizeof(*pm4));
1420 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1421
1422 ib_info = calloc(1, sizeof(*ib_info));
1423 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1424
1425 ibs_request = calloc(1, sizeof(*ibs_request));
1426 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1427
1428 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1429 CU_ASSERT_EQUAL(r, 0);
1430
1431 for (i = 0; secure && (i < 2); i++)
1432 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1433
1434 r = amdgpu_cs_ctx_create(device, &context_handle);
1435
1436 CU_ASSERT_EQUAL(r, 0);
1437
1438 /* prepare resource */
1439 resources = calloc(1, sizeof(amdgpu_bo_handle));
1440 CU_ASSERT_NOT_EQUAL(resources, NULL);
1441
1442 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1443 loop = 0;
1444 while(loop < 2) {
1445 /* allocate UC bo for sDMA use */
1446 r = amdgpu_bo_alloc_and_map(device,
1447 sdma_write_length * sizeof(uint32_t),
1448 4096, AMDGPU_GEM_DOMAIN_GTT,
1449 gtt_flags[loop], &bo, (void**)&bo_cpu,
1450 &bo_mc, &va_handle);
1451 CU_ASSERT_EQUAL(r, 0);
1452
1453 /* clear bo */
1454 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1455
1456 resources[0] = bo;
1457
1458 /* fulfill PM4: test DMA write-linear */
1459 i = j = 0;
1460 if (ip_type == AMDGPU_HW_IP_DMA) {
1461 if (family_id == AMDGPU_FAMILY_SI)
1462 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1463 sdma_write_length);
1464 else
1465 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1466 SDMA_WRITE_SUB_OPCODE_LINEAR,
1467 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1468 pm4[i++] = 0xfffffffc & bo_mc;
1469 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1470 if (family_id >= AMDGPU_FAMILY_AI)
1471 pm4[i++] = sdma_write_length - 1;
1472 else if (family_id != AMDGPU_FAMILY_SI)
1473 pm4[i++] = sdma_write_length;
1474 while(j++ < sdma_write_length)
1475 pm4[i++] = 0xdeadbeaf;
1476 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1477 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1478 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1479 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1480 pm4[i++] = 0xfffffffc & bo_mc;
1481 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1482 while(j++ < sdma_write_length)
1483 pm4[i++] = 0xdeadbeaf;
1484 }
1485
1486 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1487 ip_type, ring_id, i, pm4,
1488 1, resources, ib_info,
1489 ibs_request, secure);
1490
1491 /* verify if SDMA test result meets with expected */
1492 i = 0;
1493 if (!secure) {
1494 while(i < sdma_write_length) {
1495 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1496 }
1497 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1498 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1499 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1500 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1501 * command, 1-loop_until_compare_satisfied.
1502 * single_pass_atomic, 0-lru
1503 * engine_sel, 0-micro_engine
1504 */
1505 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1506 ATOMIC_MEM_COMMAND(1) |
1507 ATOMIC_MEM_CACHEPOLICAY(0) |
1508 ATOMIC_MEM_ENGINESEL(0));
1509 pm4[i++] = 0xfffffffc & bo_mc;
1510 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1511 pm4[i++] = 0x12345678;
1512 pm4[i++] = 0x0;
1513 pm4[i++] = 0xdeadbeaf;
1514 pm4[i++] = 0x0;
1515 pm4[i++] = 0x100;
1516 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1517 ip_type, ring_id, i, pm4,
1518 1, resources, ib_info,
1519 ibs_request, true);
1520 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1521 /* restore the bo_cpu to compare */
1522 bo_cpu_origin = bo_cpu[0];
1523 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1524 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1525 * loop, 1-loop_until_compare_satisfied.
1526 * single_pass_atomic, 0-lru
1527 */
1528 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1529 0,
1530 SDMA_ATOMIC_LOOP(1) |
1531 SDMA_ATOMIC_TMZ(1) |
1532 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1533 pm4[i++] = 0xfffffffc & bo_mc;
1534 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1535 pm4[i++] = 0x12345678;
1536 pm4[i++] = 0x0;
1537 pm4[i++] = 0xdeadbeaf;
1538 pm4[i++] = 0x0;
1539 pm4[i++] = 0x100;
1540 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1541 ip_type, ring_id, i, pm4,
1542 1, resources, ib_info,
1543 ibs_request, true);
1544 /* DMA's atomic behavir is unlike GFX
1545 * If the comparing data is not equal to destination data,
1546 * For GFX, loop again till gfx timeout(system hang).
1547 * For DMA, loop again till timer expired and then send interrupt.
1548 * So testcase can't use interrupt mechanism.
1549 * We take another way to verify. When the comparing data is not
1550 * equal to destination data, overwrite the source data to the destination
1551 * buffer. Otherwise, original destination data unchanged.
1552 * So if the bo_cpu data is overwritten, the result is passed.
1553 */
1554 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1555
1556 /* compare again for the case of dest_data != cmp_data */
1557 i = 0;
1558 /* restore again, here dest_data should be */
1559 bo_cpu_origin = bo_cpu[0];
1560 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1561 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1562 0,
1563 SDMA_ATOMIC_LOOP(1) |
1564 SDMA_ATOMIC_TMZ(1) |
1565 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1566 pm4[i++] = 0xfffffffc & bo_mc;
1567 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1568 pm4[i++] = 0x87654321;
1569 pm4[i++] = 0x0;
1570 pm4[i++] = 0xdeadbeaf;
1571 pm4[i++] = 0x0;
1572 pm4[i++] = 0x100;
1573 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1574 ip_type, ring_id, i, pm4,
1575 1, resources, ib_info,
1576 ibs_request, true);
1577 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1578 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1579 }
1580
1581 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1582 sdma_write_length * sizeof(uint32_t));
1583 CU_ASSERT_EQUAL(r, 0);
1584 loop++;
1585 }
1586 }
1587 /* clean resources */
1588 free(resources);
1589 free(ibs_request);
1590 free(ib_info);
1591 free(pm4);
1592
1593 /* end of test */
1594 r = amdgpu_cs_ctx_free(context_handle);
1595 CU_ASSERT_EQUAL(r, 0);
1596 }
1597
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1598 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1599 {
1600 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1601 ip_type,
1602 false);
1603 }
1604
amdgpu_command_submission_sdma_write_linear(void)1605 static void amdgpu_command_submission_sdma_write_linear(void)
1606 {
1607 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1608 }
1609
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1610 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1611 {
1612 const int sdma_write_length = 1024 * 1024;
1613 const int pm4_dw = 256;
1614 amdgpu_context_handle context_handle;
1615 amdgpu_bo_handle bo;
1616 amdgpu_bo_handle *resources;
1617 uint32_t *pm4;
1618 struct amdgpu_cs_ib_info *ib_info;
1619 struct amdgpu_cs_request *ibs_request;
1620 uint64_t bo_mc;
1621 volatile uint32_t *bo_cpu;
1622 int i, j, r, loop, ring_id;
1623 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1624 amdgpu_va_handle va_handle;
1625 struct drm_amdgpu_info_hw_ip hw_ip_info;
1626
1627 pm4 = calloc(pm4_dw, sizeof(*pm4));
1628 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1629
1630 ib_info = calloc(1, sizeof(*ib_info));
1631 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1632
1633 ibs_request = calloc(1, sizeof(*ibs_request));
1634 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1635
1636 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1637 CU_ASSERT_EQUAL(r, 0);
1638
1639 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1640 CU_ASSERT_EQUAL(r, 0);
1641
1642 /* prepare resource */
1643 resources = calloc(1, sizeof(amdgpu_bo_handle));
1644 CU_ASSERT_NOT_EQUAL(resources, NULL);
1645
1646 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1647 loop = 0;
1648 while(loop < 2) {
1649 /* allocate UC bo for sDMA use */
1650 r = amdgpu_bo_alloc_and_map(device_handle,
1651 sdma_write_length, 4096,
1652 AMDGPU_GEM_DOMAIN_GTT,
1653 gtt_flags[loop], &bo, (void**)&bo_cpu,
1654 &bo_mc, &va_handle);
1655 CU_ASSERT_EQUAL(r, 0);
1656
1657 /* clear bo */
1658 memset((void*)bo_cpu, 0, sdma_write_length);
1659
1660 resources[0] = bo;
1661
1662 /* fulfill PM4: test DMA const fill */
1663 i = j = 0;
1664 if (ip_type == AMDGPU_HW_IP_DMA) {
1665 if (family_id == AMDGPU_FAMILY_SI) {
1666 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1667 0, 0, 0,
1668 sdma_write_length / 4);
1669 pm4[i++] = 0xfffffffc & bo_mc;
1670 pm4[i++] = 0xdeadbeaf;
1671 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1672 } else {
1673 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1674 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1675 pm4[i++] = 0xffffffff & bo_mc;
1676 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1677 pm4[i++] = 0xdeadbeaf;
1678 if (family_id >= AMDGPU_FAMILY_AI)
1679 pm4[i++] = sdma_write_length - 1;
1680 else
1681 pm4[i++] = sdma_write_length;
1682 }
1683 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1684 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1685 if (family_id == AMDGPU_FAMILY_SI) {
1686 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1687 pm4[i++] = 0xdeadbeaf;
1688 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1689 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1690 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1691 PACKET3_DMA_DATA_SI_CP_SYNC;
1692 pm4[i++] = 0xffffffff & bo_mc;
1693 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1694 pm4[i++] = sdma_write_length;
1695 } else {
1696 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1697 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1698 PACKET3_DMA_DATA_DST_SEL(0) |
1699 PACKET3_DMA_DATA_SRC_SEL(2) |
1700 PACKET3_DMA_DATA_CP_SYNC;
1701 pm4[i++] = 0xdeadbeaf;
1702 pm4[i++] = 0;
1703 pm4[i++] = 0xfffffffc & bo_mc;
1704 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1705 pm4[i++] = sdma_write_length;
1706 }
1707 }
1708
1709 amdgpu_test_exec_cs_helper(context_handle,
1710 ip_type, ring_id,
1711 i, pm4,
1712 1, resources,
1713 ib_info, ibs_request);
1714
1715 /* verify if SDMA test result meets with expected */
1716 i = 0;
1717 while(i < (sdma_write_length / 4)) {
1718 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1719 }
1720
1721 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1722 sdma_write_length);
1723 CU_ASSERT_EQUAL(r, 0);
1724 loop++;
1725 }
1726 }
1727 /* clean resources */
1728 free(resources);
1729 free(ibs_request);
1730 free(ib_info);
1731 free(pm4);
1732
1733 /* end of test */
1734 r = amdgpu_cs_ctx_free(context_handle);
1735 CU_ASSERT_EQUAL(r, 0);
1736 }
1737
amdgpu_command_submission_sdma_const_fill(void)1738 static void amdgpu_command_submission_sdma_const_fill(void)
1739 {
1740 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1741 }
1742
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1743 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1744 {
1745 const int sdma_write_length = 1024;
1746 const int pm4_dw = 256;
1747 amdgpu_context_handle context_handle;
1748 amdgpu_bo_handle bo1, bo2;
1749 amdgpu_bo_handle *resources;
1750 uint32_t *pm4;
1751 struct amdgpu_cs_ib_info *ib_info;
1752 struct amdgpu_cs_request *ibs_request;
1753 uint64_t bo1_mc, bo2_mc;
1754 volatile unsigned char *bo1_cpu, *bo2_cpu;
1755 int i, j, r, loop1, loop2, ring_id;
1756 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1757 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1758 struct drm_amdgpu_info_hw_ip hw_ip_info;
1759
1760 pm4 = calloc(pm4_dw, sizeof(*pm4));
1761 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1762
1763 ib_info = calloc(1, sizeof(*ib_info));
1764 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1765
1766 ibs_request = calloc(1, sizeof(*ibs_request));
1767 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1768
1769 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1770 CU_ASSERT_EQUAL(r, 0);
1771
1772 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1773 CU_ASSERT_EQUAL(r, 0);
1774
1775 /* prepare resource */
1776 resources = calloc(2, sizeof(amdgpu_bo_handle));
1777 CU_ASSERT_NOT_EQUAL(resources, NULL);
1778
1779 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1780 loop1 = loop2 = 0;
1781 /* run 9 circle to test all mapping combination */
1782 while(loop1 < 2) {
1783 while(loop2 < 2) {
1784 /* allocate UC bo1for sDMA use */
1785 r = amdgpu_bo_alloc_and_map(device_handle,
1786 sdma_write_length, 4096,
1787 AMDGPU_GEM_DOMAIN_GTT,
1788 gtt_flags[loop1], &bo1,
1789 (void**)&bo1_cpu, &bo1_mc,
1790 &bo1_va_handle);
1791 CU_ASSERT_EQUAL(r, 0);
1792
1793 /* set bo1 */
1794 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1795
1796 /* allocate UC bo2 for sDMA use */
1797 r = amdgpu_bo_alloc_and_map(device_handle,
1798 sdma_write_length, 4096,
1799 AMDGPU_GEM_DOMAIN_GTT,
1800 gtt_flags[loop2], &bo2,
1801 (void**)&bo2_cpu, &bo2_mc,
1802 &bo2_va_handle);
1803 CU_ASSERT_EQUAL(r, 0);
1804
1805 /* clear bo2 */
1806 memset((void*)bo2_cpu, 0, sdma_write_length);
1807
1808 resources[0] = bo1;
1809 resources[1] = bo2;
1810
1811 /* fulfill PM4: test DMA copy linear */
1812 i = j = 0;
1813 if (ip_type == AMDGPU_HW_IP_DMA) {
1814 if (family_id == AMDGPU_FAMILY_SI) {
1815 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1816 0, 0, 0,
1817 sdma_write_length);
1818 pm4[i++] = 0xffffffff & bo2_mc;
1819 pm4[i++] = 0xffffffff & bo1_mc;
1820 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1821 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1822 } else {
1823 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1824 SDMA_COPY_SUB_OPCODE_LINEAR,
1825 0);
1826 if (family_id >= AMDGPU_FAMILY_AI)
1827 pm4[i++] = sdma_write_length - 1;
1828 else
1829 pm4[i++] = sdma_write_length;
1830 pm4[i++] = 0;
1831 pm4[i++] = 0xffffffff & bo1_mc;
1832 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1833 pm4[i++] = 0xffffffff & bo2_mc;
1834 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835 }
1836 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1837 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1838 if (family_id == AMDGPU_FAMILY_SI) {
1839 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1840 pm4[i++] = 0xfffffffc & bo1_mc;
1841 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1842 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1843 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1844 PACKET3_DMA_DATA_SI_CP_SYNC |
1845 (0xffff00000000 & bo1_mc) >> 32;
1846 pm4[i++] = 0xfffffffc & bo2_mc;
1847 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1848 pm4[i++] = sdma_write_length;
1849 } else {
1850 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1851 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1852 PACKET3_DMA_DATA_DST_SEL(0) |
1853 PACKET3_DMA_DATA_SRC_SEL(0) |
1854 PACKET3_DMA_DATA_CP_SYNC;
1855 pm4[i++] = 0xfffffffc & bo1_mc;
1856 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1857 pm4[i++] = 0xfffffffc & bo2_mc;
1858 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1859 pm4[i++] = sdma_write_length;
1860 }
1861 }
1862
1863 amdgpu_test_exec_cs_helper(context_handle,
1864 ip_type, ring_id,
1865 i, pm4,
1866 2, resources,
1867 ib_info, ibs_request);
1868
1869 /* verify if SDMA test result meets with expected */
1870 i = 0;
1871 while(i < sdma_write_length) {
1872 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1873 }
1874 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1875 sdma_write_length);
1876 CU_ASSERT_EQUAL(r, 0);
1877 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1878 sdma_write_length);
1879 CU_ASSERT_EQUAL(r, 0);
1880 loop2++;
1881 }
1882 loop1++;
1883 }
1884 }
1885 /* clean resources */
1886 free(resources);
1887 free(ibs_request);
1888 free(ib_info);
1889 free(pm4);
1890
1891 /* end of test */
1892 r = amdgpu_cs_ctx_free(context_handle);
1893 CU_ASSERT_EQUAL(r, 0);
1894 }
1895
amdgpu_command_submission_sdma_copy_linear(void)1896 static void amdgpu_command_submission_sdma_copy_linear(void)
1897 {
1898 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1899 }
1900
amdgpu_command_submission_sdma(void)1901 static void amdgpu_command_submission_sdma(void)
1902 {
1903 amdgpu_command_submission_sdma_write_linear();
1904 amdgpu_command_submission_sdma_const_fill();
1905 amdgpu_command_submission_sdma_copy_linear();
1906 }
1907
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1908 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1909 {
1910 amdgpu_context_handle context_handle;
1911 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1912 void *ib_result_cpu, *ib_result_ce_cpu;
1913 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1914 struct amdgpu_cs_request ibs_request[2] = {0};
1915 struct amdgpu_cs_ib_info ib_info[2];
1916 struct amdgpu_cs_fence fence_status[2] = {0};
1917 uint32_t *ptr;
1918 uint32_t expired;
1919 amdgpu_bo_list_handle bo_list;
1920 amdgpu_va_handle va_handle, va_handle_ce;
1921 int r;
1922 int i = 0, ib_cs_num = 2;
1923
1924 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1925 CU_ASSERT_EQUAL(r, 0);
1926
1927 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1928 AMDGPU_GEM_DOMAIN_GTT, 0,
1929 &ib_result_handle, &ib_result_cpu,
1930 &ib_result_mc_address, &va_handle);
1931 CU_ASSERT_EQUAL(r, 0);
1932
1933 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1934 AMDGPU_GEM_DOMAIN_GTT, 0,
1935 &ib_result_ce_handle, &ib_result_ce_cpu,
1936 &ib_result_ce_mc_address, &va_handle_ce);
1937 CU_ASSERT_EQUAL(r, 0);
1938
1939 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1940 ib_result_ce_handle, &bo_list);
1941 CU_ASSERT_EQUAL(r, 0);
1942
1943 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1944
1945 /* IT_SET_CE_DE_COUNTERS */
1946 ptr = ib_result_ce_cpu;
1947 if (family_id != AMDGPU_FAMILY_SI) {
1948 ptr[i++] = 0xc0008900;
1949 ptr[i++] = 0;
1950 }
1951 ptr[i++] = 0xc0008400;
1952 ptr[i++] = 1;
1953 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1954 ib_info[0].size = i;
1955 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1956
1957 /* IT_WAIT_ON_CE_COUNTER */
1958 ptr = ib_result_cpu;
1959 ptr[0] = 0xc0008600;
1960 ptr[1] = 0x00000001;
1961 ib_info[1].ib_mc_address = ib_result_mc_address;
1962 ib_info[1].size = 2;
1963
1964 for (i = 0; i < ib_cs_num; i++) {
1965 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1966 ibs_request[i].number_of_ibs = 2;
1967 ibs_request[i].ibs = ib_info;
1968 ibs_request[i].resources = bo_list;
1969 ibs_request[i].fence_info.handle = NULL;
1970 }
1971
1972 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1973
1974 CU_ASSERT_EQUAL(r, 0);
1975
1976 for (i = 0; i < ib_cs_num; i++) {
1977 fence_status[i].context = context_handle;
1978 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1979 fence_status[i].fence = ibs_request[i].seq_no;
1980 }
1981
1982 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1983 AMDGPU_TIMEOUT_INFINITE,
1984 &expired, NULL);
1985 CU_ASSERT_EQUAL(r, 0);
1986
1987 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1988 ib_result_mc_address, 4096);
1989 CU_ASSERT_EQUAL(r, 0);
1990
1991 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1992 ib_result_ce_mc_address, 4096);
1993 CU_ASSERT_EQUAL(r, 0);
1994
1995 r = amdgpu_bo_list_destroy(bo_list);
1996 CU_ASSERT_EQUAL(r, 0);
1997
1998 r = amdgpu_cs_ctx_free(context_handle);
1999 CU_ASSERT_EQUAL(r, 0);
2000 }
2001
amdgpu_command_submission_multi_fence(void)2002 static void amdgpu_command_submission_multi_fence(void)
2003 {
2004 amdgpu_command_submission_multi_fence_wait_all(true);
2005 amdgpu_command_submission_multi_fence_wait_all(false);
2006 }
2007
amdgpu_userptr_test(void)2008 static void amdgpu_userptr_test(void)
2009 {
2010 int i, r, j;
2011 uint32_t *pm4 = NULL;
2012 uint64_t bo_mc;
2013 void *ptr = NULL;
2014 int pm4_dw = 256;
2015 int sdma_write_length = 4;
2016 amdgpu_bo_handle handle;
2017 amdgpu_context_handle context_handle;
2018 struct amdgpu_cs_ib_info *ib_info;
2019 struct amdgpu_cs_request *ibs_request;
2020 amdgpu_bo_handle buf_handle;
2021 amdgpu_va_handle va_handle;
2022
2023 pm4 = calloc(pm4_dw, sizeof(*pm4));
2024 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2025
2026 ib_info = calloc(1, sizeof(*ib_info));
2027 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2028
2029 ibs_request = calloc(1, sizeof(*ibs_request));
2030 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2031
2032 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2033 CU_ASSERT_EQUAL(r, 0);
2034
2035 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2036 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2037 memset(ptr, 0, BUFFER_SIZE);
2038
2039 r = amdgpu_create_bo_from_user_mem(device_handle,
2040 ptr, BUFFER_SIZE, &buf_handle);
2041 CU_ASSERT_EQUAL(r, 0);
2042
2043 r = amdgpu_va_range_alloc(device_handle,
2044 amdgpu_gpu_va_range_general,
2045 BUFFER_SIZE, 1, 0, &bo_mc,
2046 &va_handle, 0);
2047 CU_ASSERT_EQUAL(r, 0);
2048
2049 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2050 CU_ASSERT_EQUAL(r, 0);
2051
2052 handle = buf_handle;
2053
2054 j = i = 0;
2055
2056 if (family_id == AMDGPU_FAMILY_SI)
2057 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2058 sdma_write_length);
2059 else
2060 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2061 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2062 pm4[i++] = 0xffffffff & bo_mc;
2063 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2064 if (family_id >= AMDGPU_FAMILY_AI)
2065 pm4[i++] = sdma_write_length - 1;
2066 else if (family_id != AMDGPU_FAMILY_SI)
2067 pm4[i++] = sdma_write_length;
2068
2069 while (j++ < sdma_write_length)
2070 pm4[i++] = 0xdeadbeaf;
2071
2072 if (!fork()) {
2073 pm4[0] = 0x0;
2074 exit(0);
2075 }
2076
2077 amdgpu_test_exec_cs_helper(context_handle,
2078 AMDGPU_HW_IP_DMA, 0,
2079 i, pm4,
2080 1, &handle,
2081 ib_info, ibs_request);
2082 i = 0;
2083 while (i < sdma_write_length) {
2084 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2085 }
2086 free(ibs_request);
2087 free(ib_info);
2088 free(pm4);
2089
2090 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2091 CU_ASSERT_EQUAL(r, 0);
2092 r = amdgpu_va_range_free(va_handle);
2093 CU_ASSERT_EQUAL(r, 0);
2094 r = amdgpu_bo_free(buf_handle);
2095 CU_ASSERT_EQUAL(r, 0);
2096 free(ptr);
2097
2098 r = amdgpu_cs_ctx_free(context_handle);
2099 CU_ASSERT_EQUAL(r, 0);
2100
2101 wait(NULL);
2102 }
2103
amdgpu_sync_dependency_test(void)2104 static void amdgpu_sync_dependency_test(void)
2105 {
2106 amdgpu_context_handle context_handle[2];
2107 amdgpu_bo_handle ib_result_handle;
2108 void *ib_result_cpu;
2109 uint64_t ib_result_mc_address;
2110 struct amdgpu_cs_request ibs_request;
2111 struct amdgpu_cs_ib_info ib_info;
2112 struct amdgpu_cs_fence fence_status;
2113 uint32_t expired;
2114 int i, j, r;
2115 amdgpu_bo_list_handle bo_list;
2116 amdgpu_va_handle va_handle;
2117 static uint32_t *ptr;
2118 uint64_t seq_no;
2119
2120 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2121 CU_ASSERT_EQUAL(r, 0);
2122 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2123 CU_ASSERT_EQUAL(r, 0);
2124
2125 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2126 AMDGPU_GEM_DOMAIN_GTT, 0,
2127 &ib_result_handle, &ib_result_cpu,
2128 &ib_result_mc_address, &va_handle);
2129 CU_ASSERT_EQUAL(r, 0);
2130
2131 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2132 &bo_list);
2133 CU_ASSERT_EQUAL(r, 0);
2134
2135 ptr = ib_result_cpu;
2136 i = 0;
2137
2138 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2139
2140 /* Dispatch minimal init config and verify it's executed */
2141 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2142 ptr[i++] = 0x80000000;
2143 ptr[i++] = 0x80000000;
2144
2145 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2146 ptr[i++] = 0x80000000;
2147
2148
2149 /* Program compute regs */
2150 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2151 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2152 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2153 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2154
2155
2156 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2157 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2158 /*
2159 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2160 SGPRS = 1
2161 PRIORITY = 0
2162 FLOAT_MODE = 192 (0xc0)
2163 PRIV = 0
2164 DX10_CLAMP = 1
2165 DEBUG_MODE = 0
2166 IEEE_MODE = 0
2167 BULKY = 0
2168 CDBG_USER = 0
2169 *
2170 */
2171 ptr[i++] = 0x002c0040;
2172
2173
2174 /*
2175 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2176 USER_SGPR = 8
2177 TRAP_PRESENT = 0
2178 TGID_X_EN = 0
2179 TGID_Y_EN = 0
2180 TGID_Z_EN = 0
2181 TG_SIZE_EN = 0
2182 TIDIG_COMP_CNT = 0
2183 EXCP_EN_MSB = 0
2184 LDS_SIZE = 0
2185 EXCP_EN = 0
2186 *
2187 */
2188 ptr[i++] = 0x00000010;
2189
2190
2191 /*
2192 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2193 WAVESIZE = 0
2194 *
2195 */
2196 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2197 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2198 ptr[i++] = 0x00000100;
2199
2200 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2201 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2202 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2203 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2204
2205 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2206 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2207 ptr[i++] = 0;
2208
2209 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2210 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2211 ptr[i++] = 1;
2212 ptr[i++] = 1;
2213 ptr[i++] = 1;
2214
2215
2216 /* Dispatch */
2217 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2218 ptr[i++] = 1;
2219 ptr[i++] = 1;
2220 ptr[i++] = 1;
2221 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2222
2223
2224 while (i & 7)
2225 ptr[i++] = 0xffff1000; /* type3 nop packet */
2226
2227 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2228 ib_info.ib_mc_address = ib_result_mc_address;
2229 ib_info.size = i;
2230
2231 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2232 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2233 ibs_request.ring = 0;
2234 ibs_request.number_of_ibs = 1;
2235 ibs_request.ibs = &ib_info;
2236 ibs_request.resources = bo_list;
2237 ibs_request.fence_info.handle = NULL;
2238
2239 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2240 CU_ASSERT_EQUAL(r, 0);
2241 seq_no = ibs_request.seq_no;
2242
2243
2244
2245 /* Prepare second command with dependency on the first */
2246 j = i;
2247 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2248 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2249 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2250 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2251 ptr[i++] = 99;
2252
2253 while (i & 7)
2254 ptr[i++] = 0xffff1000; /* type3 nop packet */
2255
2256 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2257 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2258 ib_info.size = i - j;
2259
2260 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2261 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2262 ibs_request.ring = 0;
2263 ibs_request.number_of_ibs = 1;
2264 ibs_request.ibs = &ib_info;
2265 ibs_request.resources = bo_list;
2266 ibs_request.fence_info.handle = NULL;
2267
2268 ibs_request.number_of_dependencies = 1;
2269
2270 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2271 ibs_request.dependencies[0].context = context_handle[1];
2272 ibs_request.dependencies[0].ip_instance = 0;
2273 ibs_request.dependencies[0].ring = 0;
2274 ibs_request.dependencies[0].fence = seq_no;
2275
2276
2277 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2278 CU_ASSERT_EQUAL(r, 0);
2279
2280
2281 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2282 fence_status.context = context_handle[0];
2283 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2284 fence_status.ip_instance = 0;
2285 fence_status.ring = 0;
2286 fence_status.fence = ibs_request.seq_no;
2287
2288 r = amdgpu_cs_query_fence_status(&fence_status,
2289 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2290 CU_ASSERT_EQUAL(r, 0);
2291
2292 /* Expect the second command to wait for shader to complete */
2293 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2294
2295 r = amdgpu_bo_list_destroy(bo_list);
2296 CU_ASSERT_EQUAL(r, 0);
2297
2298 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2299 ib_result_mc_address, 4096);
2300 CU_ASSERT_EQUAL(r, 0);
2301
2302 r = amdgpu_cs_ctx_free(context_handle[0]);
2303 CU_ASSERT_EQUAL(r, 0);
2304 r = amdgpu_cs_ctx_free(context_handle[1]);
2305 CU_ASSERT_EQUAL(r, 0);
2306
2307 free(ibs_request.dependencies);
2308 }
2309
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2310 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2311 {
2312 struct amdgpu_test_shader *shader;
2313 int i, loop = 0x10000;
2314
2315 switch (family) {
2316 case AMDGPU_FAMILY_AI:
2317 shader = &memcpy_cs_hang_slow_ai;
2318 break;
2319 case AMDGPU_FAMILY_RV:
2320 shader = &memcpy_cs_hang_slow_rv;
2321 break;
2322 default:
2323 return -1;
2324 break;
2325 }
2326
2327 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2328
2329 for (i = 0; i < loop; i++)
2330 memcpy(ptr + shader->header_length + shader->body_length * i,
2331 shader->shader + shader->header_length,
2332 shader->body_length * sizeof(uint32_t));
2333
2334 memcpy(ptr + shader->header_length + shader->body_length * loop,
2335 shader->shader + shader->header_length + shader->body_length,
2336 shader->foot_length * sizeof(uint32_t));
2337
2338 return 0;
2339 }
2340
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2341 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2342 int cs_type)
2343 {
2344 uint32_t shader_size;
2345 const uint32_t *shader;
2346
2347 switch (cs_type) {
2348 case CS_BUFFERCLEAR:
2349 shader = bufferclear_cs_shader_gfx9;
2350 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2351 break;
2352 case CS_BUFFERCOPY:
2353 shader = buffercopy_cs_shader_gfx9;
2354 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2355 break;
2356 case CS_HANG:
2357 shader = memcpy_ps_hang;
2358 shader_size = sizeof(memcpy_ps_hang);
2359 break;
2360 default:
2361 return -1;
2362 break;
2363 }
2364
2365 memcpy(ptr, shader, shader_size);
2366 return 0;
2367 }
2368
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2369 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2370 {
2371 int i = 0;
2372
2373 /* Write context control and load shadowing register if necessary */
2374 if (ip_type == AMDGPU_HW_IP_GFX) {
2375 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2376 ptr[i++] = 0x80000000;
2377 ptr[i++] = 0x80000000;
2378 }
2379
2380 /* Issue commands to set default compute state. */
2381 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2382 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2383 ptr[i++] = 0x204;
2384 i += 3;
2385
2386 /* clear mmCOMPUTE_TMPRING_SIZE */
2387 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2388 ptr[i++] = 0x218;
2389 ptr[i++] = 0;
2390
2391 return i;
2392 }
2393
amdgpu_dispatch_write_cumask(uint32_t * ptr)2394 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2395 {
2396 int i = 0;
2397
2398 /* Issue commands to set cu mask used in current dispatch */
2399 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2400 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2401 ptr[i++] = 0x216;
2402 ptr[i++] = 0xffffffff;
2403 ptr[i++] = 0xffffffff;
2404 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2405 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2406 ptr[i++] = 0x219;
2407 ptr[i++] = 0xffffffff;
2408 ptr[i++] = 0xffffffff;
2409
2410 return i;
2411 }
2412
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2413 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2414 {
2415 int i, j;
2416
2417 i = 0;
2418
2419 /* Writes shader state to HW */
2420 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2421 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2422 ptr[i++] = 0x20c;
2423 ptr[i++] = (shader_addr >> 8);
2424 ptr[i++] = (shader_addr >> 40);
2425 /* write sh regs*/
2426 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2427 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2428 /* - Gfx9ShRegBase */
2429 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2430 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2431 }
2432
2433 return i;
2434 }
2435
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2436 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2437 uint32_t ip_type,
2438 uint32_t ring)
2439 {
2440 amdgpu_context_handle context_handle;
2441 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2442 volatile unsigned char *ptr_dst;
2443 void *ptr_shader;
2444 uint32_t *ptr_cmd;
2445 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2446 amdgpu_va_handle va_dst, va_shader, va_cmd;
2447 int i, r;
2448 int bo_dst_size = 16384;
2449 int bo_shader_size = 4096;
2450 int bo_cmd_size = 4096;
2451 struct amdgpu_cs_request ibs_request = {0};
2452 struct amdgpu_cs_ib_info ib_info= {0};
2453 amdgpu_bo_list_handle bo_list;
2454 struct amdgpu_cs_fence fence_status = {0};
2455 uint32_t expired;
2456
2457 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2458 CU_ASSERT_EQUAL(r, 0);
2459
2460 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2461 AMDGPU_GEM_DOMAIN_GTT, 0,
2462 &bo_cmd, (void **)&ptr_cmd,
2463 &mc_address_cmd, &va_cmd);
2464 CU_ASSERT_EQUAL(r, 0);
2465 memset(ptr_cmd, 0, bo_cmd_size);
2466
2467 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2468 AMDGPU_GEM_DOMAIN_VRAM, 0,
2469 &bo_shader, &ptr_shader,
2470 &mc_address_shader, &va_shader);
2471 CU_ASSERT_EQUAL(r, 0);
2472 memset(ptr_shader, 0, bo_shader_size);
2473
2474 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2475 CU_ASSERT_EQUAL(r, 0);
2476
2477 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2478 AMDGPU_GEM_DOMAIN_VRAM, 0,
2479 &bo_dst, (void **)&ptr_dst,
2480 &mc_address_dst, &va_dst);
2481 CU_ASSERT_EQUAL(r, 0);
2482
2483 i = 0;
2484 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2485
2486 /* Issue commands to set cu mask used in current dispatch */
2487 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2488
2489 /* Writes shader state to HW */
2490 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2491
2492 /* Write constant data */
2493 /* Writes the UAV constant data to the SGPRs. */
2494 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2495 ptr_cmd[i++] = 0x240;
2496 ptr_cmd[i++] = mc_address_dst;
2497 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2498 ptr_cmd[i++] = 0x400;
2499 ptr_cmd[i++] = 0x74fac;
2500
2501 /* Sets a range of pixel shader constants */
2502 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2503 ptr_cmd[i++] = 0x244;
2504 ptr_cmd[i++] = 0x22222222;
2505 ptr_cmd[i++] = 0x22222222;
2506 ptr_cmd[i++] = 0x22222222;
2507 ptr_cmd[i++] = 0x22222222;
2508
2509 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2510 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2511 ptr_cmd[i++] = 0x215;
2512 ptr_cmd[i++] = 0;
2513
2514 /* dispatch direct command */
2515 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2516 ptr_cmd[i++] = 0x10;
2517 ptr_cmd[i++] = 1;
2518 ptr_cmd[i++] = 1;
2519 ptr_cmd[i++] = 1;
2520
2521 while (i & 7)
2522 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2523
2524 resources[0] = bo_dst;
2525 resources[1] = bo_shader;
2526 resources[2] = bo_cmd;
2527 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2528 CU_ASSERT_EQUAL(r, 0);
2529
2530 ib_info.ib_mc_address = mc_address_cmd;
2531 ib_info.size = i;
2532 ibs_request.ip_type = ip_type;
2533 ibs_request.ring = ring;
2534 ibs_request.resources = bo_list;
2535 ibs_request.number_of_ibs = 1;
2536 ibs_request.ibs = &ib_info;
2537 ibs_request.fence_info.handle = NULL;
2538
2539 /* submit CS */
2540 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2541 CU_ASSERT_EQUAL(r, 0);
2542
2543 r = amdgpu_bo_list_destroy(bo_list);
2544 CU_ASSERT_EQUAL(r, 0);
2545
2546 fence_status.ip_type = ip_type;
2547 fence_status.ip_instance = 0;
2548 fence_status.ring = ring;
2549 fence_status.context = context_handle;
2550 fence_status.fence = ibs_request.seq_no;
2551
2552 /* wait for IB accomplished */
2553 r = amdgpu_cs_query_fence_status(&fence_status,
2554 AMDGPU_TIMEOUT_INFINITE,
2555 0, &expired);
2556 CU_ASSERT_EQUAL(r, 0);
2557 CU_ASSERT_EQUAL(expired, true);
2558
2559 /* verify if memset test result meets with expected */
2560 i = 0;
2561 while(i < bo_dst_size) {
2562 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2563 }
2564
2565 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2566 CU_ASSERT_EQUAL(r, 0);
2567
2568 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2569 CU_ASSERT_EQUAL(r, 0);
2570
2571 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2572 CU_ASSERT_EQUAL(r, 0);
2573
2574 r = amdgpu_cs_ctx_free(context_handle);
2575 CU_ASSERT_EQUAL(r, 0);
2576 }
2577
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2578 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2579 uint32_t ip_type,
2580 uint32_t ring,
2581 int hang)
2582 {
2583 amdgpu_context_handle context_handle;
2584 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2585 volatile unsigned char *ptr_dst;
2586 void *ptr_shader;
2587 unsigned char *ptr_src;
2588 uint32_t *ptr_cmd;
2589 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2590 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2591 int i, r;
2592 int bo_dst_size = 16384;
2593 int bo_shader_size = 4096;
2594 int bo_cmd_size = 4096;
2595 struct amdgpu_cs_request ibs_request = {0};
2596 struct amdgpu_cs_ib_info ib_info= {0};
2597 uint32_t expired, hang_state, hangs;
2598 enum cs_type cs_type;
2599 amdgpu_bo_list_handle bo_list;
2600 struct amdgpu_cs_fence fence_status = {0};
2601
2602 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2603 CU_ASSERT_EQUAL(r, 0);
2604
2605 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2606 AMDGPU_GEM_DOMAIN_GTT, 0,
2607 &bo_cmd, (void **)&ptr_cmd,
2608 &mc_address_cmd, &va_cmd);
2609 CU_ASSERT_EQUAL(r, 0);
2610 memset(ptr_cmd, 0, bo_cmd_size);
2611
2612 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2613 AMDGPU_GEM_DOMAIN_VRAM, 0,
2614 &bo_shader, &ptr_shader,
2615 &mc_address_shader, &va_shader);
2616 CU_ASSERT_EQUAL(r, 0);
2617 memset(ptr_shader, 0, bo_shader_size);
2618
2619 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2620 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2621 CU_ASSERT_EQUAL(r, 0);
2622
2623 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2624 AMDGPU_GEM_DOMAIN_VRAM, 0,
2625 &bo_src, (void **)&ptr_src,
2626 &mc_address_src, &va_src);
2627 CU_ASSERT_EQUAL(r, 0);
2628
2629 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2630 AMDGPU_GEM_DOMAIN_VRAM, 0,
2631 &bo_dst, (void **)&ptr_dst,
2632 &mc_address_dst, &va_dst);
2633 CU_ASSERT_EQUAL(r, 0);
2634
2635 memset(ptr_src, 0x55, bo_dst_size);
2636
2637 i = 0;
2638 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2639
2640 /* Issue commands to set cu mask used in current dispatch */
2641 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2642
2643 /* Writes shader state to HW */
2644 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2645
2646 /* Write constant data */
2647 /* Writes the texture resource constants data to the SGPRs */
2648 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2649 ptr_cmd[i++] = 0x240;
2650 ptr_cmd[i++] = mc_address_src;
2651 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2652 ptr_cmd[i++] = 0x400;
2653 ptr_cmd[i++] = 0x74fac;
2654
2655 /* Writes the UAV constant data to the SGPRs. */
2656 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2657 ptr_cmd[i++] = 0x244;
2658 ptr_cmd[i++] = mc_address_dst;
2659 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2660 ptr_cmd[i++] = 0x400;
2661 ptr_cmd[i++] = 0x74fac;
2662
2663 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2664 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2665 ptr_cmd[i++] = 0x215;
2666 ptr_cmd[i++] = 0;
2667
2668 /* dispatch direct command */
2669 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2670 ptr_cmd[i++] = 0x10;
2671 ptr_cmd[i++] = 1;
2672 ptr_cmd[i++] = 1;
2673 ptr_cmd[i++] = 1;
2674
2675 while (i & 7)
2676 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2677
2678 resources[0] = bo_shader;
2679 resources[1] = bo_src;
2680 resources[2] = bo_dst;
2681 resources[3] = bo_cmd;
2682 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2683 CU_ASSERT_EQUAL(r, 0);
2684
2685 ib_info.ib_mc_address = mc_address_cmd;
2686 ib_info.size = i;
2687 ibs_request.ip_type = ip_type;
2688 ibs_request.ring = ring;
2689 ibs_request.resources = bo_list;
2690 ibs_request.number_of_ibs = 1;
2691 ibs_request.ibs = &ib_info;
2692 ibs_request.fence_info.handle = NULL;
2693 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2694 CU_ASSERT_EQUAL(r, 0);
2695
2696 fence_status.ip_type = ip_type;
2697 fence_status.ip_instance = 0;
2698 fence_status.ring = ring;
2699 fence_status.context = context_handle;
2700 fence_status.fence = ibs_request.seq_no;
2701
2702 /* wait for IB accomplished */
2703 r = amdgpu_cs_query_fence_status(&fence_status,
2704 AMDGPU_TIMEOUT_INFINITE,
2705 0, &expired);
2706
2707 if (!hang) {
2708 CU_ASSERT_EQUAL(r, 0);
2709 CU_ASSERT_EQUAL(expired, true);
2710
2711 /* verify if memcpy test result meets with expected */
2712 i = 0;
2713 while(i < bo_dst_size) {
2714 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2715 i++;
2716 }
2717 } else {
2718 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2719 CU_ASSERT_EQUAL(r, 0);
2720 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2721 }
2722
2723 r = amdgpu_bo_list_destroy(bo_list);
2724 CU_ASSERT_EQUAL(r, 0);
2725
2726 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2727 CU_ASSERT_EQUAL(r, 0);
2728 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2729 CU_ASSERT_EQUAL(r, 0);
2730
2731 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2732 CU_ASSERT_EQUAL(r, 0);
2733
2734 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2735 CU_ASSERT_EQUAL(r, 0);
2736
2737 r = amdgpu_cs_ctx_free(context_handle);
2738 CU_ASSERT_EQUAL(r, 0);
2739 }
2740
amdgpu_compute_dispatch_test(void)2741 static void amdgpu_compute_dispatch_test(void)
2742 {
2743 int r;
2744 struct drm_amdgpu_info_hw_ip info;
2745 uint32_t ring_id;
2746
2747 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2748 CU_ASSERT_EQUAL(r, 0);
2749 if (!info.available_rings)
2750 printf("SKIP ... as there's no compute ring\n");
2751
2752 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2753 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2754 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2755 }
2756 }
2757
amdgpu_gfx_dispatch_test(void)2758 static void amdgpu_gfx_dispatch_test(void)
2759 {
2760 int r;
2761 struct drm_amdgpu_info_hw_ip info;
2762 uint32_t ring_id;
2763
2764 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2765 CU_ASSERT_EQUAL(r, 0);
2766 if (!info.available_rings)
2767 printf("SKIP ... as there's no graphics ring\n");
2768
2769 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2770 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2771 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2772 }
2773 }
2774
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2775 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2776 {
2777 int r;
2778 struct drm_amdgpu_info_hw_ip info;
2779 uint32_t ring_id;
2780
2781 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2782 CU_ASSERT_EQUAL(r, 0);
2783 if (!info.available_rings)
2784 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2785
2786 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2787 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2788 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2789 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2790 }
2791 }
2792
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2793 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2794 uint32_t ip_type, uint32_t ring)
2795 {
2796 amdgpu_context_handle context_handle;
2797 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2798 volatile unsigned char *ptr_dst;
2799 void *ptr_shader;
2800 unsigned char *ptr_src;
2801 uint32_t *ptr_cmd;
2802 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2803 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2804 int i, r;
2805 int bo_dst_size = 0x4000000;
2806 int bo_shader_size = 0x400000;
2807 int bo_cmd_size = 4096;
2808 struct amdgpu_cs_request ibs_request = {0};
2809 struct amdgpu_cs_ib_info ib_info= {0};
2810 uint32_t hang_state, hangs, expired;
2811 struct amdgpu_gpu_info gpu_info = {0};
2812 amdgpu_bo_list_handle bo_list;
2813 struct amdgpu_cs_fence fence_status = {0};
2814
2815 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2816 CU_ASSERT_EQUAL(r, 0);
2817
2818 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2819 CU_ASSERT_EQUAL(r, 0);
2820
2821 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2822 AMDGPU_GEM_DOMAIN_GTT, 0,
2823 &bo_cmd, (void **)&ptr_cmd,
2824 &mc_address_cmd, &va_cmd);
2825 CU_ASSERT_EQUAL(r, 0);
2826 memset(ptr_cmd, 0, bo_cmd_size);
2827
2828 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2829 AMDGPU_GEM_DOMAIN_VRAM, 0,
2830 &bo_shader, &ptr_shader,
2831 &mc_address_shader, &va_shader);
2832 CU_ASSERT_EQUAL(r, 0);
2833 memset(ptr_shader, 0, bo_shader_size);
2834
2835 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2836 CU_ASSERT_EQUAL(r, 0);
2837
2838 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2839 AMDGPU_GEM_DOMAIN_VRAM, 0,
2840 &bo_src, (void **)&ptr_src,
2841 &mc_address_src, &va_src);
2842 CU_ASSERT_EQUAL(r, 0);
2843
2844 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2845 AMDGPU_GEM_DOMAIN_VRAM, 0,
2846 &bo_dst, (void **)&ptr_dst,
2847 &mc_address_dst, &va_dst);
2848 CU_ASSERT_EQUAL(r, 0);
2849
2850 memset(ptr_src, 0x55, bo_dst_size);
2851
2852 i = 0;
2853 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2854
2855 /* Issue commands to set cu mask used in current dispatch */
2856 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2857
2858 /* Writes shader state to HW */
2859 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2860
2861 /* Write constant data */
2862 /* Writes the texture resource constants data to the SGPRs */
2863 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864 ptr_cmd[i++] = 0x240;
2865 ptr_cmd[i++] = mc_address_src;
2866 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2867 ptr_cmd[i++] = 0x400000;
2868 ptr_cmd[i++] = 0x74fac;
2869
2870 /* Writes the UAV constant data to the SGPRs. */
2871 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2872 ptr_cmd[i++] = 0x244;
2873 ptr_cmd[i++] = mc_address_dst;
2874 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2875 ptr_cmd[i++] = 0x400000;
2876 ptr_cmd[i++] = 0x74fac;
2877
2878 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2879 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2880 ptr_cmd[i++] = 0x215;
2881 ptr_cmd[i++] = 0;
2882
2883 /* dispatch direct command */
2884 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2885 ptr_cmd[i++] = 0x10000;
2886 ptr_cmd[i++] = 1;
2887 ptr_cmd[i++] = 1;
2888 ptr_cmd[i++] = 1;
2889
2890 while (i & 7)
2891 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2892
2893 resources[0] = bo_shader;
2894 resources[1] = bo_src;
2895 resources[2] = bo_dst;
2896 resources[3] = bo_cmd;
2897 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2898 CU_ASSERT_EQUAL(r, 0);
2899
2900 ib_info.ib_mc_address = mc_address_cmd;
2901 ib_info.size = i;
2902 ibs_request.ip_type = ip_type;
2903 ibs_request.ring = ring;
2904 ibs_request.resources = bo_list;
2905 ibs_request.number_of_ibs = 1;
2906 ibs_request.ibs = &ib_info;
2907 ibs_request.fence_info.handle = NULL;
2908 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2909 CU_ASSERT_EQUAL(r, 0);
2910
2911 fence_status.ip_type = ip_type;
2912 fence_status.ip_instance = 0;
2913 fence_status.ring = ring;
2914 fence_status.context = context_handle;
2915 fence_status.fence = ibs_request.seq_no;
2916
2917 /* wait for IB accomplished */
2918 r = amdgpu_cs_query_fence_status(&fence_status,
2919 AMDGPU_TIMEOUT_INFINITE,
2920 0, &expired);
2921
2922 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2923 CU_ASSERT_EQUAL(r, 0);
2924 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2925
2926 r = amdgpu_bo_list_destroy(bo_list);
2927 CU_ASSERT_EQUAL(r, 0);
2928
2929 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2930 CU_ASSERT_EQUAL(r, 0);
2931 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2932 CU_ASSERT_EQUAL(r, 0);
2933
2934 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2935 CU_ASSERT_EQUAL(r, 0);
2936
2937 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2938 CU_ASSERT_EQUAL(r, 0);
2939
2940 r = amdgpu_cs_ctx_free(context_handle);
2941 CU_ASSERT_EQUAL(r, 0);
2942 }
2943
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2944 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2945 {
2946 int r;
2947 struct drm_amdgpu_info_hw_ip info;
2948 uint32_t ring_id;
2949
2950 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2951 CU_ASSERT_EQUAL(r, 0);
2952 if (!info.available_rings)
2953 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2954
2955 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2956 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2957 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2958 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2959 }
2960 }
2961
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2962 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2963 {
2964 struct amdgpu_test_shader *shader;
2965 int i, loop = 0x40000;
2966
2967 switch (family) {
2968 case AMDGPU_FAMILY_AI:
2969 case AMDGPU_FAMILY_RV:
2970 shader = &memcpy_ps_hang_slow_ai;
2971 break;
2972 default:
2973 return -1;
2974 break;
2975 }
2976
2977 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2978
2979 for (i = 0; i < loop; i++)
2980 memcpy(ptr + shader->header_length + shader->body_length * i,
2981 shader->shader + shader->header_length,
2982 shader->body_length * sizeof(uint32_t));
2983
2984 memcpy(ptr + shader->header_length + shader->body_length * loop,
2985 shader->shader + shader->header_length + shader->body_length,
2986 shader->foot_length * sizeof(uint32_t));
2987
2988 return 0;
2989 }
2990
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2991 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2992 {
2993 int i;
2994 uint32_t shader_offset= 256;
2995 uint32_t mem_offset, patch_code_offset;
2996 uint32_t shader_size, patchinfo_code_size;
2997 const uint32_t *shader;
2998 const uint32_t *patchinfo_code;
2999 const uint32_t *patchcode_offset;
3000
3001 switch (ps_type) {
3002 case PS_CONST:
3003 shader = ps_const_shader_gfx9;
3004 shader_size = sizeof(ps_const_shader_gfx9);
3005 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3006 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3007 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3008 break;
3009 case PS_TEX:
3010 shader = ps_tex_shader_gfx9;
3011 shader_size = sizeof(ps_tex_shader_gfx9);
3012 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3013 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3014 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3015 break;
3016 case PS_HANG:
3017 shader = memcpy_ps_hang;
3018 shader_size = sizeof(memcpy_ps_hang);
3019
3020 memcpy(ptr, shader, shader_size);
3021 return 0;
3022 default:
3023 return -1;
3024 break;
3025 }
3026
3027 /* write main shader program */
3028 for (i = 0 ; i < 10; i++) {
3029 mem_offset = i * shader_offset;
3030 memcpy(ptr + mem_offset, shader, shader_size);
3031 }
3032
3033 /* overwrite patch codes */
3034 for (i = 0 ; i < 10; i++) {
3035 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3036 patch_code_offset = i * patchinfo_code_size;
3037 memcpy(ptr + mem_offset,
3038 patchinfo_code + patch_code_offset,
3039 patchinfo_code_size * sizeof(uint32_t));
3040 }
3041
3042 return 0;
3043 }
3044
3045 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3046 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3047 {
3048 const uint32_t *shader;
3049 uint32_t shader_size;
3050
3051 shader = vs_RectPosTexFast_shader_gfx9;
3052 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3053
3054 memcpy(ptr, shader, shader_size);
3055
3056 return 0;
3057 }
3058
amdgpu_draw_init(uint32_t * ptr)3059 static int amdgpu_draw_init(uint32_t *ptr)
3060 {
3061 int i = 0;
3062 const uint32_t *preamblecache_ptr;
3063 uint32_t preamblecache_size;
3064
3065 /* Write context control and load shadowing register if necessary */
3066 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3067 ptr[i++] = 0x80000000;
3068 ptr[i++] = 0x80000000;
3069
3070 preamblecache_ptr = preamblecache_gfx9;
3071 preamblecache_size = sizeof(preamblecache_gfx9);
3072
3073 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3074 return i + preamblecache_size/sizeof(uint32_t);
3075 }
3076
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3077 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3078 uint64_t dst_addr,
3079 int hang_slow)
3080 {
3081 int i = 0;
3082
3083 /* setup color buffer */
3084 /* offset reg
3085 0xA318 CB_COLOR0_BASE
3086 0xA319 CB_COLOR0_BASE_EXT
3087 0xA31A CB_COLOR0_ATTRIB2
3088 0xA31B CB_COLOR0_VIEW
3089 0xA31C CB_COLOR0_INFO
3090 0xA31D CB_COLOR0_ATTRIB
3091 0xA31E CB_COLOR0_DCC_CONTROL
3092 0xA31F CB_COLOR0_CMASK
3093 0xA320 CB_COLOR0_CMASK_BASE_EXT
3094 0xA321 CB_COLOR0_FMASK
3095 0xA322 CB_COLOR0_FMASK_BASE_EXT
3096 0xA323 CB_COLOR0_CLEAR_WORD0
3097 0xA324 CB_COLOR0_CLEAR_WORD1
3098 0xA325 CB_COLOR0_DCC_BASE
3099 0xA326 CB_COLOR0_DCC_BASE_EXT */
3100 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3101 ptr[i++] = 0x318;
3102 ptr[i++] = dst_addr >> 8;
3103 ptr[i++] = dst_addr >> 40;
3104 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3105 ptr[i++] = 0;
3106 ptr[i++] = 0x50438;
3107 ptr[i++] = 0x10140000;
3108 i += 9;
3109
3110 /* mmCB_MRT0_EPITCH */
3111 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3112 ptr[i++] = 0x1e8;
3113 ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3114
3115 /* 0xA32B CB_COLOR1_BASE */
3116 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3117 ptr[i++] = 0x32b;
3118 ptr[i++] = 0;
3119
3120 /* 0xA33A CB_COLOR1_BASE */
3121 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3122 ptr[i++] = 0x33a;
3123 ptr[i++] = 0;
3124
3125 /* SPI_SHADER_COL_FORMAT */
3126 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3127 ptr[i++] = 0x1c5;
3128 ptr[i++] = 9;
3129
3130 /* Setup depth buffer */
3131 /* mmDB_Z_INFO */
3132 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3133 ptr[i++] = 0xe;
3134 i += 2;
3135
3136 return i;
3137 }
3138
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3139 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3140 {
3141 int i = 0;
3142 const uint32_t *cached_cmd_ptr;
3143 uint32_t cached_cmd_size;
3144
3145 /* mmPA_SC_TILE_STEERING_OVERRIDE */
3146 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3147 ptr[i++] = 0xd7;
3148 ptr[i++] = 0;
3149
3150 ptr[i++] = 0xffff1000;
3151 ptr[i++] = 0xc0021000;
3152
3153 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3154 ptr[i++] = 0xd7;
3155 ptr[i++] = 1;
3156
3157 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3158 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3159 ptr[i++] = 0x2fe;
3160 i += 16;
3161
3162 /* mmPA_SC_CENTROID_PRIORITY_0 */
3163 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3164 ptr[i++] = 0x2f5;
3165 i += 2;
3166
3167 cached_cmd_ptr = cached_cmd_gfx9;
3168 cached_cmd_size = sizeof(cached_cmd_gfx9);
3169
3170 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3171 if (hang_slow)
3172 *(ptr + i + 12) = 0x8000800;
3173 i += cached_cmd_size/sizeof(uint32_t);
3174
3175 return i;
3176 }
3177
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3178 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3179 int ps_type,
3180 uint64_t shader_addr,
3181 int hang_slow)
3182 {
3183 int i = 0;
3184
3185 /* mmPA_CL_VS_OUT_CNTL */
3186 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3187 ptr[i++] = 0x207;
3188 ptr[i++] = 0;
3189
3190 /* mmSPI_SHADER_PGM_RSRC3_VS */
3191 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3192 ptr[i++] = 0x46;
3193 ptr[i++] = 0xffff;
3194
3195 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3196 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3197 ptr[i++] = 0x48;
3198 ptr[i++] = shader_addr >> 8;
3199 ptr[i++] = shader_addr >> 40;
3200
3201 /* mmSPI_SHADER_PGM_RSRC1_VS */
3202 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3203 ptr[i++] = 0x4a;
3204 ptr[i++] = 0xc0081;
3205 /* mmSPI_SHADER_PGM_RSRC2_VS */
3206 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3207 ptr[i++] = 0x4b;
3208 ptr[i++] = 0x18;
3209
3210 /* mmSPI_VS_OUT_CONFIG */
3211 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3212 ptr[i++] = 0x1b1;
3213 ptr[i++] = 2;
3214
3215 /* mmSPI_SHADER_POS_FORMAT */
3216 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3217 ptr[i++] = 0x1c3;
3218 ptr[i++] = 4;
3219
3220 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3221 ptr[i++] = 0x4c;
3222 i += 2;
3223 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3224 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3225
3226 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3227 ptr[i++] = 0x50;
3228 i += 2;
3229 if (ps_type == PS_CONST) {
3230 i += 2;
3231 } else if (ps_type == PS_TEX) {
3232 ptr[i++] = 0x3f800000;
3233 ptr[i++] = 0x3f800000;
3234 }
3235
3236 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3237 ptr[i++] = 0x54;
3238 i += 4;
3239
3240 return i;
3241 }
3242
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3243 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3244 int ps_type,
3245 uint64_t shader_addr)
3246 {
3247 int i, j;
3248 const uint32_t *sh_registers;
3249 const uint32_t *context_registers;
3250 uint32_t num_sh_reg, num_context_reg;
3251
3252 if (ps_type == PS_CONST) {
3253 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3254 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3255 num_sh_reg = ps_num_sh_registers_gfx9;
3256 num_context_reg = ps_num_context_registers_gfx9;
3257 } else if (ps_type == PS_TEX) {
3258 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3259 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3260 num_sh_reg = ps_num_sh_registers_gfx9;
3261 num_context_reg = ps_num_context_registers_gfx9;
3262 }
3263
3264 i = 0;
3265
3266 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3267 0x2c08 SPI_SHADER_PGM_LO_PS
3268 0x2c09 SPI_SHADER_PGM_HI_PS */
3269 shader_addr += 256 * 9;
3270 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3271 ptr[i++] = 0x7;
3272 ptr[i++] = 0xffff;
3273 ptr[i++] = shader_addr >> 8;
3274 ptr[i++] = shader_addr >> 40;
3275
3276 for (j = 0; j < num_sh_reg; j++) {
3277 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3278 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3279 ptr[i++] = sh_registers[j * 2 + 1];
3280 }
3281
3282 for (j = 0; j < num_context_reg; j++) {
3283 if (context_registers[j * 2] != 0xA1C5) {
3284 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3285 ptr[i++] = context_registers[j * 2] - 0xa000;
3286 ptr[i++] = context_registers[j * 2 + 1];
3287 }
3288
3289 if (context_registers[j * 2] == 0xA1B4) {
3290 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3291 ptr[i++] = 0x1b3;
3292 ptr[i++] = 2;
3293 }
3294 }
3295
3296 return i;
3297 }
3298
amdgpu_draw_draw(uint32_t * ptr)3299 static int amdgpu_draw_draw(uint32_t *ptr)
3300 {
3301 int i = 0;
3302
3303 /* mmIA_MULTI_VGT_PARAM */
3304 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3305 ptr[i++] = 0x40000258;
3306 ptr[i++] = 0xd00ff;
3307
3308 /* mmVGT_PRIMITIVE_TYPE */
3309 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3310 ptr[i++] = 0x10000242;
3311 ptr[i++] = 0x11;
3312
3313 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3314 ptr[i++] = 3;
3315 ptr[i++] = 2;
3316
3317 return i;
3318 }
3319
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3320 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3321 amdgpu_bo_handle bo_shader_ps,
3322 amdgpu_bo_handle bo_shader_vs,
3323 uint64_t mc_address_shader_ps,
3324 uint64_t mc_address_shader_vs,
3325 uint32_t ring_id)
3326 {
3327 amdgpu_context_handle context_handle;
3328 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3329 volatile unsigned char *ptr_dst;
3330 uint32_t *ptr_cmd;
3331 uint64_t mc_address_dst, mc_address_cmd;
3332 amdgpu_va_handle va_dst, va_cmd;
3333 int i, r;
3334 int bo_dst_size = 16384;
3335 int bo_cmd_size = 4096;
3336 struct amdgpu_cs_request ibs_request = {0};
3337 struct amdgpu_cs_ib_info ib_info = {0};
3338 struct amdgpu_cs_fence fence_status = {0};
3339 uint32_t expired;
3340 amdgpu_bo_list_handle bo_list;
3341
3342 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3343 CU_ASSERT_EQUAL(r, 0);
3344
3345 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3346 AMDGPU_GEM_DOMAIN_GTT, 0,
3347 &bo_cmd, (void **)&ptr_cmd,
3348 &mc_address_cmd, &va_cmd);
3349 CU_ASSERT_EQUAL(r, 0);
3350 memset(ptr_cmd, 0, bo_cmd_size);
3351
3352 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3353 AMDGPU_GEM_DOMAIN_VRAM, 0,
3354 &bo_dst, (void **)&ptr_dst,
3355 &mc_address_dst, &va_dst);
3356 CU_ASSERT_EQUAL(r, 0);
3357
3358 i = 0;
3359 i += amdgpu_draw_init(ptr_cmd + i);
3360
3361 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3362
3363 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3364
3365 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3366
3367 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3368
3369 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3370 ptr_cmd[i++] = 0xc;
3371 ptr_cmd[i++] = 0x33333333;
3372 ptr_cmd[i++] = 0x33333333;
3373 ptr_cmd[i++] = 0x33333333;
3374 ptr_cmd[i++] = 0x33333333;
3375
3376 i += amdgpu_draw_draw(ptr_cmd + i);
3377
3378 while (i & 7)
3379 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3380
3381 resources[0] = bo_dst;
3382 resources[1] = bo_shader_ps;
3383 resources[2] = bo_shader_vs;
3384 resources[3] = bo_cmd;
3385 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3386 CU_ASSERT_EQUAL(r, 0);
3387
3388 ib_info.ib_mc_address = mc_address_cmd;
3389 ib_info.size = i;
3390 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3391 ibs_request.ring = ring_id;
3392 ibs_request.resources = bo_list;
3393 ibs_request.number_of_ibs = 1;
3394 ibs_request.ibs = &ib_info;
3395 ibs_request.fence_info.handle = NULL;
3396
3397 /* submit CS */
3398 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3399 CU_ASSERT_EQUAL(r, 0);
3400
3401 r = amdgpu_bo_list_destroy(bo_list);
3402 CU_ASSERT_EQUAL(r, 0);
3403
3404 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3405 fence_status.ip_instance = 0;
3406 fence_status.ring = ring_id;
3407 fence_status.context = context_handle;
3408 fence_status.fence = ibs_request.seq_no;
3409
3410 /* wait for IB accomplished */
3411 r = amdgpu_cs_query_fence_status(&fence_status,
3412 AMDGPU_TIMEOUT_INFINITE,
3413 0, &expired);
3414 CU_ASSERT_EQUAL(r, 0);
3415 CU_ASSERT_EQUAL(expired, true);
3416
3417 /* verify if memset test result meets with expected */
3418 i = 0;
3419 while(i < bo_dst_size) {
3420 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3421 }
3422
3423 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3424 CU_ASSERT_EQUAL(r, 0);
3425
3426 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3427 CU_ASSERT_EQUAL(r, 0);
3428
3429 r = amdgpu_cs_ctx_free(context_handle);
3430 CU_ASSERT_EQUAL(r, 0);
3431 }
3432
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3433 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3434 uint32_t ring)
3435 {
3436 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3437 void *ptr_shader_ps;
3438 void *ptr_shader_vs;
3439 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3440 amdgpu_va_handle va_shader_ps, va_shader_vs;
3441 int r;
3442 int bo_shader_size = 4096;
3443
3444 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445 AMDGPU_GEM_DOMAIN_VRAM, 0,
3446 &bo_shader_ps, &ptr_shader_ps,
3447 &mc_address_shader_ps, &va_shader_ps);
3448 CU_ASSERT_EQUAL(r, 0);
3449 memset(ptr_shader_ps, 0, bo_shader_size);
3450
3451 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452 AMDGPU_GEM_DOMAIN_VRAM, 0,
3453 &bo_shader_vs, &ptr_shader_vs,
3454 &mc_address_shader_vs, &va_shader_vs);
3455 CU_ASSERT_EQUAL(r, 0);
3456 memset(ptr_shader_vs, 0, bo_shader_size);
3457
3458 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3459 CU_ASSERT_EQUAL(r, 0);
3460
3461 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462 CU_ASSERT_EQUAL(r, 0);
3463
3464 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465 mc_address_shader_ps, mc_address_shader_vs, ring);
3466
3467 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468 CU_ASSERT_EQUAL(r, 0);
3469
3470 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471 CU_ASSERT_EQUAL(r, 0);
3472 }
3473
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3474 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3475 amdgpu_bo_handle bo_shader_ps,
3476 amdgpu_bo_handle bo_shader_vs,
3477 uint64_t mc_address_shader_ps,
3478 uint64_t mc_address_shader_vs,
3479 uint32_t ring, int hang)
3480 {
3481 amdgpu_context_handle context_handle;
3482 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3483 volatile unsigned char *ptr_dst;
3484 unsigned char *ptr_src;
3485 uint32_t *ptr_cmd;
3486 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3487 amdgpu_va_handle va_dst, va_src, va_cmd;
3488 int i, r;
3489 int bo_size = 16384;
3490 int bo_cmd_size = 4096;
3491 struct amdgpu_cs_request ibs_request = {0};
3492 struct amdgpu_cs_ib_info ib_info= {0};
3493 uint32_t hang_state, hangs;
3494 uint32_t expired;
3495 amdgpu_bo_list_handle bo_list;
3496 struct amdgpu_cs_fence fence_status = {0};
3497
3498 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3499 CU_ASSERT_EQUAL(r, 0);
3500
3501 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3502 AMDGPU_GEM_DOMAIN_GTT, 0,
3503 &bo_cmd, (void **)&ptr_cmd,
3504 &mc_address_cmd, &va_cmd);
3505 CU_ASSERT_EQUAL(r, 0);
3506 memset(ptr_cmd, 0, bo_cmd_size);
3507
3508 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3509 AMDGPU_GEM_DOMAIN_VRAM, 0,
3510 &bo_src, (void **)&ptr_src,
3511 &mc_address_src, &va_src);
3512 CU_ASSERT_EQUAL(r, 0);
3513
3514 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3515 AMDGPU_GEM_DOMAIN_VRAM, 0,
3516 &bo_dst, (void **)&ptr_dst,
3517 &mc_address_dst, &va_dst);
3518 CU_ASSERT_EQUAL(r, 0);
3519
3520 memset(ptr_src, 0x55, bo_size);
3521
3522 i = 0;
3523 i += amdgpu_draw_init(ptr_cmd + i);
3524
3525 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3526
3527 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3528
3529 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3530
3531 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3532
3533 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3534 ptr_cmd[i++] = 0xc;
3535 ptr_cmd[i++] = mc_address_src >> 8;
3536 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3537 ptr_cmd[i++] = 0x7c01f;
3538 ptr_cmd[i++] = 0x90500fac;
3539 ptr_cmd[i++] = 0x3e000;
3540 i += 3;
3541
3542 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3543 ptr_cmd[i++] = 0x14;
3544 ptr_cmd[i++] = 0x92;
3545 i += 3;
3546
3547 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3548 ptr_cmd[i++] = 0x191;
3549 ptr_cmd[i++] = 0;
3550
3551 i += amdgpu_draw_draw(ptr_cmd + i);
3552
3553 while (i & 7)
3554 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3555
3556 resources[0] = bo_dst;
3557 resources[1] = bo_src;
3558 resources[2] = bo_shader_ps;
3559 resources[3] = bo_shader_vs;
3560 resources[4] = bo_cmd;
3561 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3562 CU_ASSERT_EQUAL(r, 0);
3563
3564 ib_info.ib_mc_address = mc_address_cmd;
3565 ib_info.size = i;
3566 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3567 ibs_request.ring = ring;
3568 ibs_request.resources = bo_list;
3569 ibs_request.number_of_ibs = 1;
3570 ibs_request.ibs = &ib_info;
3571 ibs_request.fence_info.handle = NULL;
3572 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3573 CU_ASSERT_EQUAL(r, 0);
3574
3575 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3576 fence_status.ip_instance = 0;
3577 fence_status.ring = ring;
3578 fence_status.context = context_handle;
3579 fence_status.fence = ibs_request.seq_no;
3580
3581 /* wait for IB accomplished */
3582 r = amdgpu_cs_query_fence_status(&fence_status,
3583 AMDGPU_TIMEOUT_INFINITE,
3584 0, &expired);
3585 if (!hang) {
3586 CU_ASSERT_EQUAL(r, 0);
3587 CU_ASSERT_EQUAL(expired, true);
3588
3589 /* verify if memcpy test result meets with expected */
3590 i = 0;
3591 while(i < bo_size) {
3592 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3593 i++;
3594 }
3595 } else {
3596 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3597 CU_ASSERT_EQUAL(r, 0);
3598 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3599 }
3600
3601 r = amdgpu_bo_list_destroy(bo_list);
3602 CU_ASSERT_EQUAL(r, 0);
3603
3604 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3605 CU_ASSERT_EQUAL(r, 0);
3606 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3607 CU_ASSERT_EQUAL(r, 0);
3608
3609 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3610 CU_ASSERT_EQUAL(r, 0);
3611
3612 r = amdgpu_cs_ctx_free(context_handle);
3613 CU_ASSERT_EQUAL(r, 0);
3614 }
3615
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3616 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3617 int hang)
3618 {
3619 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3620 void *ptr_shader_ps;
3621 void *ptr_shader_vs;
3622 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3623 amdgpu_va_handle va_shader_ps, va_shader_vs;
3624 int bo_shader_size = 4096;
3625 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3626 int r;
3627
3628 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3629 AMDGPU_GEM_DOMAIN_VRAM, 0,
3630 &bo_shader_ps, &ptr_shader_ps,
3631 &mc_address_shader_ps, &va_shader_ps);
3632 CU_ASSERT_EQUAL(r, 0);
3633 memset(ptr_shader_ps, 0, bo_shader_size);
3634
3635 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3636 AMDGPU_GEM_DOMAIN_VRAM, 0,
3637 &bo_shader_vs, &ptr_shader_vs,
3638 &mc_address_shader_vs, &va_shader_vs);
3639 CU_ASSERT_EQUAL(r, 0);
3640 memset(ptr_shader_vs, 0, bo_shader_size);
3641
3642 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3643 CU_ASSERT_EQUAL(r, 0);
3644
3645 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3646 CU_ASSERT_EQUAL(r, 0);
3647
3648 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3649 mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3650
3651 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3652 CU_ASSERT_EQUAL(r, 0);
3653
3654 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3655 CU_ASSERT_EQUAL(r, 0);
3656 }
3657
amdgpu_draw_test(void)3658 static void amdgpu_draw_test(void)
3659 {
3660 int r;
3661 struct drm_amdgpu_info_hw_ip info;
3662 uint32_t ring_id;
3663
3664 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3665 CU_ASSERT_EQUAL(r, 0);
3666 if (!info.available_rings)
3667 printf("SKIP ... as there's no graphics ring\n");
3668
3669 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3670 amdgpu_memset_draw_test(device_handle, ring_id);
3671 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3672 }
3673 }
3674
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3675 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3676 {
3677 amdgpu_context_handle context_handle;
3678 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3679 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3680 void *ptr_shader_ps;
3681 void *ptr_shader_vs;
3682 volatile unsigned char *ptr_dst;
3683 unsigned char *ptr_src;
3684 uint32_t *ptr_cmd;
3685 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3686 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3687 amdgpu_va_handle va_shader_ps, va_shader_vs;
3688 amdgpu_va_handle va_dst, va_src, va_cmd;
3689 struct amdgpu_gpu_info gpu_info = {0};
3690 int i, r;
3691 int bo_size = 0x4000000;
3692 int bo_shader_ps_size = 0x400000;
3693 int bo_shader_vs_size = 4096;
3694 int bo_cmd_size = 4096;
3695 struct amdgpu_cs_request ibs_request = {0};
3696 struct amdgpu_cs_ib_info ib_info= {0};
3697 uint32_t hang_state, hangs, expired;
3698 amdgpu_bo_list_handle bo_list;
3699 struct amdgpu_cs_fence fence_status = {0};
3700
3701 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3702 CU_ASSERT_EQUAL(r, 0);
3703
3704 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3705 CU_ASSERT_EQUAL(r, 0);
3706
3707 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3708 AMDGPU_GEM_DOMAIN_GTT, 0,
3709 &bo_cmd, (void **)&ptr_cmd,
3710 &mc_address_cmd, &va_cmd);
3711 CU_ASSERT_EQUAL(r, 0);
3712 memset(ptr_cmd, 0, bo_cmd_size);
3713
3714 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3715 AMDGPU_GEM_DOMAIN_VRAM, 0,
3716 &bo_shader_ps, &ptr_shader_ps,
3717 &mc_address_shader_ps, &va_shader_ps);
3718 CU_ASSERT_EQUAL(r, 0);
3719 memset(ptr_shader_ps, 0, bo_shader_ps_size);
3720
3721 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3722 AMDGPU_GEM_DOMAIN_VRAM, 0,
3723 &bo_shader_vs, &ptr_shader_vs,
3724 &mc_address_shader_vs, &va_shader_vs);
3725 CU_ASSERT_EQUAL(r, 0);
3726 memset(ptr_shader_vs, 0, bo_shader_vs_size);
3727
3728 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3729 CU_ASSERT_EQUAL(r, 0);
3730
3731 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3732 CU_ASSERT_EQUAL(r, 0);
3733
3734 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3735 AMDGPU_GEM_DOMAIN_VRAM, 0,
3736 &bo_src, (void **)&ptr_src,
3737 &mc_address_src, &va_src);
3738 CU_ASSERT_EQUAL(r, 0);
3739
3740 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3741 AMDGPU_GEM_DOMAIN_VRAM, 0,
3742 &bo_dst, (void **)&ptr_dst,
3743 &mc_address_dst, &va_dst);
3744 CU_ASSERT_EQUAL(r, 0);
3745
3746 memset(ptr_src, 0x55, bo_size);
3747
3748 i = 0;
3749 i += amdgpu_draw_init(ptr_cmd + i);
3750
3751 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3752
3753 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3754
3755 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3756 mc_address_shader_vs, 1);
3757
3758 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3759
3760 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3761 ptr_cmd[i++] = 0xc;
3762 ptr_cmd[i++] = mc_address_src >> 8;
3763 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3764 ptr_cmd[i++] = 0x1ffc7ff;
3765 ptr_cmd[i++] = 0x90500fac;
3766 ptr_cmd[i++] = 0xffe000;
3767 i += 3;
3768
3769 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770 ptr_cmd[i++] = 0x14;
3771 ptr_cmd[i++] = 0x92;
3772 i += 3;
3773
3774 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3775 ptr_cmd[i++] = 0x191;
3776 ptr_cmd[i++] = 0;
3777
3778 i += amdgpu_draw_draw(ptr_cmd + i);
3779
3780 while (i & 7)
3781 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3782
3783 resources[0] = bo_dst;
3784 resources[1] = bo_src;
3785 resources[2] = bo_shader_ps;
3786 resources[3] = bo_shader_vs;
3787 resources[4] = bo_cmd;
3788 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3789 CU_ASSERT_EQUAL(r, 0);
3790
3791 ib_info.ib_mc_address = mc_address_cmd;
3792 ib_info.size = i;
3793 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3794 ibs_request.ring = ring;
3795 ibs_request.resources = bo_list;
3796 ibs_request.number_of_ibs = 1;
3797 ibs_request.ibs = &ib_info;
3798 ibs_request.fence_info.handle = NULL;
3799 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3800 CU_ASSERT_EQUAL(r, 0);
3801
3802 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3803 fence_status.ip_instance = 0;
3804 fence_status.ring = ring;
3805 fence_status.context = context_handle;
3806 fence_status.fence = ibs_request.seq_no;
3807
3808 /* wait for IB accomplished */
3809 r = amdgpu_cs_query_fence_status(&fence_status,
3810 AMDGPU_TIMEOUT_INFINITE,
3811 0, &expired);
3812
3813 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3814 CU_ASSERT_EQUAL(r, 0);
3815 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3816
3817 r = amdgpu_bo_list_destroy(bo_list);
3818 CU_ASSERT_EQUAL(r, 0);
3819
3820 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3821 CU_ASSERT_EQUAL(r, 0);
3822 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3823 CU_ASSERT_EQUAL(r, 0);
3824
3825 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3826 CU_ASSERT_EQUAL(r, 0);
3827
3828 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3829 CU_ASSERT_EQUAL(r, 0);
3830 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3831 CU_ASSERT_EQUAL(r, 0);
3832
3833 r = amdgpu_cs_ctx_free(context_handle);
3834 CU_ASSERT_EQUAL(r, 0);
3835 }
3836
amdgpu_gpu_reset_test(void)3837 static void amdgpu_gpu_reset_test(void)
3838 {
3839 int r;
3840 char debugfs_path[256], tmp[10];
3841 int fd;
3842 struct stat sbuf;
3843 amdgpu_context_handle context_handle;
3844 uint32_t hang_state, hangs;
3845
3846 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3847 CU_ASSERT_EQUAL(r, 0);
3848
3849 r = fstat(drm_amdgpu[0], &sbuf);
3850 CU_ASSERT_EQUAL(r, 0);
3851
3852 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3853 fd = open(debugfs_path, O_RDONLY);
3854 CU_ASSERT(fd >= 0);
3855
3856 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3857 CU_ASSERT(r > 0);
3858
3859 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3860 CU_ASSERT_EQUAL(r, 0);
3861 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3862
3863 close(fd);
3864 r = amdgpu_cs_ctx_free(context_handle);
3865 CU_ASSERT_EQUAL(r, 0);
3866
3867 amdgpu_compute_dispatch_test();
3868 amdgpu_gfx_dispatch_test();
3869 }
3870