1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "util_math.h"
43
44 static amdgpu_device_handle device_handle;
45 static uint32_t major_version;
46 static uint32_t minor_version;
47 static uint32_t family_id;
48
49 static void amdgpu_query_info_test(void);
50 static void amdgpu_command_submission_gfx(void);
51 static void amdgpu_command_submission_compute(void);
52 static void amdgpu_command_submission_multi_fence(void);
53 static void amdgpu_command_submission_sdma(void);
54 static void amdgpu_userptr_test(void);
55 static void amdgpu_semaphore_test(void);
56 static void amdgpu_sync_dependency_test(void);
57 static void amdgpu_bo_eviction_test(void);
58 static void amdgpu_compute_dispatch_test(void);
59 static void amdgpu_gfx_dispatch_test(void);
60 static void amdgpu_draw_test(void);
61 static void amdgpu_gpu_reset_test(void);
62
63 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
64 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
65 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
66 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
67 unsigned ip_type,
68 int instance, int pm4_dw, uint32_t *pm4_src,
69 int res_cnt, amdgpu_bo_handle *resources,
70 struct amdgpu_cs_ib_info *ib_info,
71 struct amdgpu_cs_request *ibs_request);
72
73 CU_TestInfo basic_tests[] = {
74 { "Query Info Test", amdgpu_query_info_test },
75 { "Userptr Test", amdgpu_userptr_test },
76 { "bo eviction Test", amdgpu_bo_eviction_test },
77 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
78 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
79 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
80 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
81 { "SW semaphore Test", amdgpu_semaphore_test },
82 { "Sync dependency Test", amdgpu_sync_dependency_test },
83 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
84 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
85 { "Draw Test", amdgpu_draw_test },
86 { "GPU reset Test", amdgpu_gpu_reset_test },
87 CU_TEST_INFO_NULL,
88 };
89 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
90 #define SDMA_PKT_HEADER_op_offset 0
91 #define SDMA_PKT_HEADER_op_mask 0x000000FF
92 #define SDMA_PKT_HEADER_op_shift 0
93 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
94 #define SDMA_OPCODE_CONSTANT_FILL 11
95 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
96 /* 0 = byte fill
97 * 2 = DW fill
98 */
99 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
100 (((sub_op) & 0xFF) << 8) | \
101 (((op) & 0xFF) << 0))
102 #define SDMA_OPCODE_WRITE 2
103 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
104 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
105
106 #define SDMA_OPCODE_COPY 1
107 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
108
109 #define GFX_COMPUTE_NOP 0xffff1000
110 #define SDMA_NOP 0x0
111
112 /* PM4 */
113 #define PACKET_TYPE0 0
114 #define PACKET_TYPE1 1
115 #define PACKET_TYPE2 2
116 #define PACKET_TYPE3 3
117
118 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
119 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
120 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
121 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
122 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
123 ((reg) & 0xFFFF) | \
124 ((n) & 0x3FFF) << 16)
125 #define CP_PACKET2 0x80000000
126 #define PACKET2_PAD_SHIFT 0
127 #define PACKET2_PAD_MASK (0x3fffffff << 0)
128
129 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
130
131 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
132 (((op) & 0xFF) << 8) | \
133 ((n) & 0x3FFF) << 16)
134 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
135
136 /* Packet 3 types */
137 #define PACKET3_NOP 0x10
138
139 #define PACKET3_WRITE_DATA 0x37
140 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
141 /* 0 - register
142 * 1 - memory (sync - via GRBM)
143 * 2 - gl2
144 * 3 - gds
145 * 4 - reserved
146 * 5 - memory (async - direct)
147 */
148 #define WR_ONE_ADDR (1 << 16)
149 #define WR_CONFIRM (1 << 20)
150 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
151 /* 0 - LRU
152 * 1 - Stream
153 */
154 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
155 /* 0 - me
156 * 1 - pfp
157 * 2 - ce
158 */
159
160 #define PACKET3_DMA_DATA 0x50
161 /* 1. header
162 * 2. CONTROL
163 * 3. SRC_ADDR_LO or DATA [31:0]
164 * 4. SRC_ADDR_HI [31:0]
165 * 5. DST_ADDR_LO [31:0]
166 * 6. DST_ADDR_HI [7:0]
167 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
168 */
169 /* CONTROL */
170 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
171 /* 0 - ME
172 * 1 - PFP
173 */
174 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
175 /* 0 - LRU
176 * 1 - Stream
177 * 2 - Bypass
178 */
179 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
180 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
181 /* 0 - DST_ADDR using DAS
182 * 1 - GDS
183 * 3 - DST_ADDR using L2
184 */
185 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
186 /* 0 - LRU
187 * 1 - Stream
188 * 2 - Bypass
189 */
190 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
191 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
192 /* 0 - SRC_ADDR using SAS
193 * 1 - GDS
194 * 2 - DATA
195 * 3 - SRC_ADDR using L2
196 */
197 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
198 /* COMMAND */
199 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
200 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
201 /* 0 - none
202 * 1 - 8 in 16
203 * 2 - 8 in 32
204 * 3 - 8 in 64
205 */
206 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
207 /* 0 - none
208 * 1 - 8 in 16
209 * 2 - 8 in 32
210 * 3 - 8 in 64
211 */
212 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
213 /* 0 - memory
214 * 1 - register
215 */
216 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
217 /* 0 - memory
218 * 1 - register
219 */
220 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
221 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
222 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
223
224 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
225 (((b) & 0x1) << 26) | \
226 (((t) & 0x1) << 23) | \
227 (((s) & 0x1) << 22) | \
228 (((cnt) & 0xFFFFF) << 0))
229 #define SDMA_OPCODE_COPY_SI 3
230 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
231 #define SDMA_NOP_SI 0xf
232 #define GFX_COMPUTE_NOP_SI 0x80000000
233 #define PACKET3_DMA_DATA_SI 0x41
234 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
235 /* 0 - ME
236 * 1 - PFP
237 */
238 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
239 /* 0 - DST_ADDR using DAS
240 * 1 - GDS
241 * 3 - DST_ADDR using L2
242 */
243 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
244 /* 0 - SRC_ADDR using SAS
245 * 1 - GDS
246 * 2 - DATA
247 * 3 - SRC_ADDR using L2
248 */
249 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
250
251
252 #define PKT3_CONTEXT_CONTROL 0x28
253 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
254 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
255 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
256
257 #define PKT3_CLEAR_STATE 0x12
258
259 #define PKT3_SET_SH_REG 0x76
260 #define PACKET3_SET_SH_REG_START 0x00002c00
261
262 #define PACKET3_DISPATCH_DIRECT 0x15
263 #define PACKET3_EVENT_WRITE 0x46
264 #define PACKET3_ACQUIRE_MEM 0x58
265 #define PACKET3_SET_CONTEXT_REG 0x69
266 #define PACKET3_SET_UCONFIG_REG 0x79
267 #define PACKET3_DRAW_INDEX_AUTO 0x2D
268 /* gfx 8 */
269 #define mmCOMPUTE_PGM_LO 0x2e0c
270 #define mmCOMPUTE_PGM_RSRC1 0x2e12
271 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
272 #define mmCOMPUTE_USER_DATA_0 0x2e40
273 #define mmCOMPUTE_USER_DATA_1 0x2e41
274 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
275 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
276
277
278
279 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
280 ((num & 0x0000ff00) << 8) | \
281 ((num & 0x00ff0000) >> 8) | \
282 ((num & 0x000000ff) << 24))
283
284
285 /* Shader code
286 * void main()
287 {
288
289 float x = some_input;
290 for (unsigned i = 0; i < 1000000; i++)
291 x = sin(x);
292
293 u[0] = 42u;
294 }
295 */
296
297 static uint32_t shader_bin[] = {
298 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
299 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
300 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
301 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
302 };
303
304 #define CODE_OFFSET 512
305 #define DATA_OFFSET 1024
306
307 enum cs_type {
308 CS_BUFFERCLEAR,
309 CS_BUFFERCOPY,
310 CS_HANG,
311 CS_HANG_SLOW
312 };
313
314 static const uint32_t bufferclear_cs_shader_gfx9[] = {
315 0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
316 0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
317 0xBF810000
318 };
319
320 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
321 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
322 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
323 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
324 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
325 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
326 };
327
328 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
329
330 static const uint32_t buffercopy_cs_shader_gfx9[] = {
331 0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
332 0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
333 };
334
335 static const uint32_t preamblecache_gfx9[] = {
336 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
337 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
338 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
339 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
340 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
341 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
342 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
343 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
344 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
345 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
346 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
347 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
348 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
349 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
350 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
351 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
352 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
353 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
354 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
355 0xc0017900, 0x24b, 0x0
356 };
357
358 enum ps_type {
359 PS_CONST,
360 PS_TEX,
361 PS_HANG,
362 PS_HANG_SLOW
363 };
364
365 static const uint32_t ps_const_shader_gfx9[] = {
366 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
367 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
368 0xC4001C0F, 0x00000100, 0xBF810000
369 };
370
371 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
372
373 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
374 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
375 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
376 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
377 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
378 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
379 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
380 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
381 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
382 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
383 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
384 }
385 };
386
387 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
388 0x00000004
389 };
390
391 static const uint32_t ps_num_sh_registers_gfx9 = 2;
392
393 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
394 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
395 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
396 };
397
398 static const uint32_t ps_num_context_registers_gfx9 = 7;
399
400 static const uint32_t ps_const_context_reg_gfx9[][2] = {
401 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
402 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
403 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
404 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
405 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
406 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
407 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
408 };
409
410 static const uint32_t ps_tex_shader_gfx9[] = {
411 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
412 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
413 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
414 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
415 0x00000100, 0xBF810000
416 };
417
418 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
419 0x0000000B
420 };
421
422 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
423
424 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
425 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
426 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
427 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
428 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
429 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
430 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
431 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
432 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
433 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
434 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
435 }
436 };
437
438 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
439 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
440 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
441 };
442
443 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
444 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
445 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
446 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
447 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
448 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
449 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
450 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
451 };
452
453 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
454 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
455 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
456 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
457 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
458 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
459 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
460 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
461 0xC400020F, 0x05060403, 0xBF810000
462 };
463
464 static const uint32_t cached_cmd_gfx9[] = {
465 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
466 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
467 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
468 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
469 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
470 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
471 0xc0026900, 0x292, 0x20, 0x60201b8,
472 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
473 };
474
475 unsigned int memcpy_ps_hang[] = {
476 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
477 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
478 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
479 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
480 0xF800180F, 0x03020100, 0xBF810000
481 };
482
483 struct amdgpu_test_shader {
484 uint32_t *shader;
485 uint32_t header_length;
486 uint32_t body_length;
487 uint32_t foot_length;
488 };
489
490 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
491 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
492 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
493 };
494
495 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
496 memcpy_cs_hang_slow_ai_codes,
497 4,
498 3,
499 1
500 };
501
502 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
503 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
504 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
505 };
506
507 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
508 memcpy_cs_hang_slow_rv_codes,
509 4,
510 3,
511 1
512 };
513
514 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
515 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
516 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
517 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
518 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
519 0x03020100, 0xbf810000
520 };
521
522 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
523 memcpy_ps_hang_slow_ai_codes,
524 7,
525 2,
526 9
527 };
528
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)529 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
530 unsigned alignment, unsigned heap, uint64_t alloc_flags,
531 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
532 uint64_t *mc_address,
533 amdgpu_va_handle *va_handle)
534 {
535 struct amdgpu_bo_alloc_request request = {};
536 amdgpu_bo_handle buf_handle;
537 amdgpu_va_handle handle;
538 uint64_t vmc_addr;
539 int r;
540
541 request.alloc_size = size;
542 request.phys_alignment = alignment;
543 request.preferred_heap = heap;
544 request.flags = alloc_flags;
545
546 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
547 if (r)
548 return r;
549
550 r = amdgpu_va_range_alloc(dev,
551 amdgpu_gpu_va_range_general,
552 size, alignment, 0, &vmc_addr,
553 &handle, 0);
554 if (r)
555 goto error_va_alloc;
556
557 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
558 AMDGPU_VM_PAGE_READABLE |
559 AMDGPU_VM_PAGE_WRITEABLE |
560 AMDGPU_VM_PAGE_EXECUTABLE |
561 mapping_flags,
562 AMDGPU_VA_OP_MAP);
563 if (r)
564 goto error_va_map;
565
566 r = amdgpu_bo_cpu_map(buf_handle, cpu);
567 if (r)
568 goto error_cpu_map;
569
570 *bo = buf_handle;
571 *mc_address = vmc_addr;
572 *va_handle = handle;
573
574 return 0;
575
576 error_cpu_map:
577 amdgpu_bo_cpu_unmap(buf_handle);
578
579 error_va_map:
580 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
581
582 error_va_alloc:
583 amdgpu_bo_free(buf_handle);
584 return r;
585 }
586
587
588
suite_basic_tests_init(void)589 int suite_basic_tests_init(void)
590 {
591 struct amdgpu_gpu_info gpu_info = {0};
592 int r;
593
594 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
595 &minor_version, &device_handle);
596
597 if (r) {
598 if ((r == -EACCES) && (errno == EACCES))
599 printf("\n\nError:%s. "
600 "Hint:Try to run this test program as root.",
601 strerror(errno));
602 return CUE_SINIT_FAILED;
603 }
604
605 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
606 if (r)
607 return CUE_SINIT_FAILED;
608
609 family_id = gpu_info.family_id;
610
611 return CUE_SUCCESS;
612 }
613
suite_basic_tests_clean(void)614 int suite_basic_tests_clean(void)
615 {
616 int r = amdgpu_device_deinitialize(device_handle);
617
618 if (r == 0)
619 return CUE_SUCCESS;
620 else
621 return CUE_SCLEAN_FAILED;
622 }
623
amdgpu_query_info_test(void)624 static void amdgpu_query_info_test(void)
625 {
626 struct amdgpu_gpu_info gpu_info = {0};
627 uint32_t version, feature;
628 int r;
629
630 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
631 CU_ASSERT_EQUAL(r, 0);
632
633 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
634 0, &version, &feature);
635 CU_ASSERT_EQUAL(r, 0);
636 }
637
amdgpu_command_submission_gfx_separate_ibs(void)638 static void amdgpu_command_submission_gfx_separate_ibs(void)
639 {
640 amdgpu_context_handle context_handle;
641 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
642 void *ib_result_cpu, *ib_result_ce_cpu;
643 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
644 struct amdgpu_cs_request ibs_request = {0};
645 struct amdgpu_cs_ib_info ib_info[2];
646 struct amdgpu_cs_fence fence_status = {0};
647 uint32_t *ptr;
648 uint32_t expired;
649 amdgpu_bo_list_handle bo_list;
650 amdgpu_va_handle va_handle, va_handle_ce;
651 int r, i = 0;
652
653 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
654 CU_ASSERT_EQUAL(r, 0);
655
656 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
657 AMDGPU_GEM_DOMAIN_GTT, 0,
658 &ib_result_handle, &ib_result_cpu,
659 &ib_result_mc_address, &va_handle);
660 CU_ASSERT_EQUAL(r, 0);
661
662 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
663 AMDGPU_GEM_DOMAIN_GTT, 0,
664 &ib_result_ce_handle, &ib_result_ce_cpu,
665 &ib_result_ce_mc_address, &va_handle_ce);
666 CU_ASSERT_EQUAL(r, 0);
667
668 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
669 ib_result_ce_handle, &bo_list);
670 CU_ASSERT_EQUAL(r, 0);
671
672 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
673
674 /* IT_SET_CE_DE_COUNTERS */
675 ptr = ib_result_ce_cpu;
676 if (family_id != AMDGPU_FAMILY_SI) {
677 ptr[i++] = 0xc0008900;
678 ptr[i++] = 0;
679 }
680 ptr[i++] = 0xc0008400;
681 ptr[i++] = 1;
682 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
683 ib_info[0].size = i;
684 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
685
686 /* IT_WAIT_ON_CE_COUNTER */
687 ptr = ib_result_cpu;
688 ptr[0] = 0xc0008600;
689 ptr[1] = 0x00000001;
690 ib_info[1].ib_mc_address = ib_result_mc_address;
691 ib_info[1].size = 2;
692
693 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
694 ibs_request.number_of_ibs = 2;
695 ibs_request.ibs = ib_info;
696 ibs_request.resources = bo_list;
697 ibs_request.fence_info.handle = NULL;
698
699 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
700
701 CU_ASSERT_EQUAL(r, 0);
702
703 fence_status.context = context_handle;
704 fence_status.ip_type = AMDGPU_HW_IP_GFX;
705 fence_status.ip_instance = 0;
706 fence_status.fence = ibs_request.seq_no;
707
708 r = amdgpu_cs_query_fence_status(&fence_status,
709 AMDGPU_TIMEOUT_INFINITE,
710 0, &expired);
711 CU_ASSERT_EQUAL(r, 0);
712
713 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
714 ib_result_mc_address, 4096);
715 CU_ASSERT_EQUAL(r, 0);
716
717 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
718 ib_result_ce_mc_address, 4096);
719 CU_ASSERT_EQUAL(r, 0);
720
721 r = amdgpu_bo_list_destroy(bo_list);
722 CU_ASSERT_EQUAL(r, 0);
723
724 r = amdgpu_cs_ctx_free(context_handle);
725 CU_ASSERT_EQUAL(r, 0);
726
727 }
728
amdgpu_command_submission_gfx_shared_ib(void)729 static void amdgpu_command_submission_gfx_shared_ib(void)
730 {
731 amdgpu_context_handle context_handle;
732 amdgpu_bo_handle ib_result_handle;
733 void *ib_result_cpu;
734 uint64_t ib_result_mc_address;
735 struct amdgpu_cs_request ibs_request = {0};
736 struct amdgpu_cs_ib_info ib_info[2];
737 struct amdgpu_cs_fence fence_status = {0};
738 uint32_t *ptr;
739 uint32_t expired;
740 amdgpu_bo_list_handle bo_list;
741 amdgpu_va_handle va_handle;
742 int r, i = 0;
743
744 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
745 CU_ASSERT_EQUAL(r, 0);
746
747 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
748 AMDGPU_GEM_DOMAIN_GTT, 0,
749 &ib_result_handle, &ib_result_cpu,
750 &ib_result_mc_address, &va_handle);
751 CU_ASSERT_EQUAL(r, 0);
752
753 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
754 &bo_list);
755 CU_ASSERT_EQUAL(r, 0);
756
757 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
758
759 /* IT_SET_CE_DE_COUNTERS */
760 ptr = ib_result_cpu;
761 if (family_id != AMDGPU_FAMILY_SI) {
762 ptr[i++] = 0xc0008900;
763 ptr[i++] = 0;
764 }
765 ptr[i++] = 0xc0008400;
766 ptr[i++] = 1;
767 ib_info[0].ib_mc_address = ib_result_mc_address;
768 ib_info[0].size = i;
769 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
770
771 ptr = (uint32_t *)ib_result_cpu + 4;
772 ptr[0] = 0xc0008600;
773 ptr[1] = 0x00000001;
774 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
775 ib_info[1].size = 2;
776
777 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
778 ibs_request.number_of_ibs = 2;
779 ibs_request.ibs = ib_info;
780 ibs_request.resources = bo_list;
781 ibs_request.fence_info.handle = NULL;
782
783 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
784
785 CU_ASSERT_EQUAL(r, 0);
786
787 fence_status.context = context_handle;
788 fence_status.ip_type = AMDGPU_HW_IP_GFX;
789 fence_status.ip_instance = 0;
790 fence_status.fence = ibs_request.seq_no;
791
792 r = amdgpu_cs_query_fence_status(&fence_status,
793 AMDGPU_TIMEOUT_INFINITE,
794 0, &expired);
795 CU_ASSERT_EQUAL(r, 0);
796
797 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
798 ib_result_mc_address, 4096);
799 CU_ASSERT_EQUAL(r, 0);
800
801 r = amdgpu_bo_list_destroy(bo_list);
802 CU_ASSERT_EQUAL(r, 0);
803
804 r = amdgpu_cs_ctx_free(context_handle);
805 CU_ASSERT_EQUAL(r, 0);
806 }
807
amdgpu_command_submission_gfx_cp_write_data(void)808 static void amdgpu_command_submission_gfx_cp_write_data(void)
809 {
810 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
811 }
812
amdgpu_command_submission_gfx_cp_const_fill(void)813 static void amdgpu_command_submission_gfx_cp_const_fill(void)
814 {
815 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
816 }
817
amdgpu_command_submission_gfx_cp_copy_data(void)818 static void amdgpu_command_submission_gfx_cp_copy_data(void)
819 {
820 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
821 }
822
amdgpu_bo_eviction_test(void)823 static void amdgpu_bo_eviction_test(void)
824 {
825 const int sdma_write_length = 1024;
826 const int pm4_dw = 256;
827 amdgpu_context_handle context_handle;
828 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
829 amdgpu_bo_handle *resources;
830 uint32_t *pm4;
831 struct amdgpu_cs_ib_info *ib_info;
832 struct amdgpu_cs_request *ibs_request;
833 uint64_t bo1_mc, bo2_mc;
834 volatile unsigned char *bo1_cpu, *bo2_cpu;
835 int i, j, r, loop1, loop2;
836 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
837 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
838 struct amdgpu_heap_info vram_info, gtt_info;
839
840 pm4 = calloc(pm4_dw, sizeof(*pm4));
841 CU_ASSERT_NOT_EQUAL(pm4, NULL);
842
843 ib_info = calloc(1, sizeof(*ib_info));
844 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
845
846 ibs_request = calloc(1, sizeof(*ibs_request));
847 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
848
849 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
850 CU_ASSERT_EQUAL(r, 0);
851
852 /* prepare resource */
853 resources = calloc(4, sizeof(amdgpu_bo_handle));
854 CU_ASSERT_NOT_EQUAL(resources, NULL);
855
856 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
857 0, &vram_info);
858 CU_ASSERT_EQUAL(r, 0);
859
860 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
861 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
862 CU_ASSERT_EQUAL(r, 0);
863 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
864 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
865 CU_ASSERT_EQUAL(r, 0);
866
867 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
868 0, >t_info);
869 CU_ASSERT_EQUAL(r, 0);
870
871 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
872 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
873 CU_ASSERT_EQUAL(r, 0);
874 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
875 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
876 CU_ASSERT_EQUAL(r, 0);
877
878
879
880 loop1 = loop2 = 0;
881 /* run 9 circle to test all mapping combination */
882 while(loop1 < 2) {
883 while(loop2 < 2) {
884 /* allocate UC bo1for sDMA use */
885 r = amdgpu_bo_alloc_and_map(device_handle,
886 sdma_write_length, 4096,
887 AMDGPU_GEM_DOMAIN_GTT,
888 gtt_flags[loop1], &bo1,
889 (void**)&bo1_cpu, &bo1_mc,
890 &bo1_va_handle);
891 CU_ASSERT_EQUAL(r, 0);
892
893 /* set bo1 */
894 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
895
896 /* allocate UC bo2 for sDMA use */
897 r = amdgpu_bo_alloc_and_map(device_handle,
898 sdma_write_length, 4096,
899 AMDGPU_GEM_DOMAIN_GTT,
900 gtt_flags[loop2], &bo2,
901 (void**)&bo2_cpu, &bo2_mc,
902 &bo2_va_handle);
903 CU_ASSERT_EQUAL(r, 0);
904
905 /* clear bo2 */
906 memset((void*)bo2_cpu, 0, sdma_write_length);
907
908 resources[0] = bo1;
909 resources[1] = bo2;
910 resources[2] = vram_max[loop2];
911 resources[3] = gtt_max[loop2];
912
913 /* fulfill PM4: test DMA copy linear */
914 i = j = 0;
915 if (family_id == AMDGPU_FAMILY_SI) {
916 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
917 sdma_write_length);
918 pm4[i++] = 0xffffffff & bo2_mc;
919 pm4[i++] = 0xffffffff & bo1_mc;
920 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
921 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
922 } else {
923 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
924 if (family_id >= AMDGPU_FAMILY_AI)
925 pm4[i++] = sdma_write_length - 1;
926 else
927 pm4[i++] = sdma_write_length;
928 pm4[i++] = 0;
929 pm4[i++] = 0xffffffff & bo1_mc;
930 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
931 pm4[i++] = 0xffffffff & bo2_mc;
932 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
933 }
934
935 amdgpu_test_exec_cs_helper(context_handle,
936 AMDGPU_HW_IP_DMA, 0,
937 i, pm4,
938 4, resources,
939 ib_info, ibs_request);
940
941 /* verify if SDMA test result meets with expected */
942 i = 0;
943 while(i < sdma_write_length) {
944 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
945 }
946 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
947 sdma_write_length);
948 CU_ASSERT_EQUAL(r, 0);
949 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
950 sdma_write_length);
951 CU_ASSERT_EQUAL(r, 0);
952 loop2++;
953 }
954 loop2 = 0;
955 loop1++;
956 }
957 amdgpu_bo_free(vram_max[0]);
958 amdgpu_bo_free(vram_max[1]);
959 amdgpu_bo_free(gtt_max[0]);
960 amdgpu_bo_free(gtt_max[1]);
961 /* clean resources */
962 free(resources);
963 free(ibs_request);
964 free(ib_info);
965 free(pm4);
966
967 /* end of test */
968 r = amdgpu_cs_ctx_free(context_handle);
969 CU_ASSERT_EQUAL(r, 0);
970 }
971
972
amdgpu_command_submission_gfx(void)973 static void amdgpu_command_submission_gfx(void)
974 {
975 /* write data using the CP */
976 amdgpu_command_submission_gfx_cp_write_data();
977 /* const fill using the CP */
978 amdgpu_command_submission_gfx_cp_const_fill();
979 /* copy data using the CP */
980 amdgpu_command_submission_gfx_cp_copy_data();
981 /* separate IB buffers for multi-IB submission */
982 amdgpu_command_submission_gfx_separate_ibs();
983 /* shared IB buffer for multi-IB submission */
984 amdgpu_command_submission_gfx_shared_ib();
985 }
986
amdgpu_semaphore_test(void)987 static void amdgpu_semaphore_test(void)
988 {
989 amdgpu_context_handle context_handle[2];
990 amdgpu_semaphore_handle sem;
991 amdgpu_bo_handle ib_result_handle[2];
992 void *ib_result_cpu[2];
993 uint64_t ib_result_mc_address[2];
994 struct amdgpu_cs_request ibs_request[2] = {0};
995 struct amdgpu_cs_ib_info ib_info[2] = {0};
996 struct amdgpu_cs_fence fence_status = {0};
997 uint32_t *ptr;
998 uint32_t expired;
999 uint32_t sdma_nop, gfx_nop;
1000 amdgpu_bo_list_handle bo_list[2];
1001 amdgpu_va_handle va_handle[2];
1002 int r, i;
1003
1004 if (family_id == AMDGPU_FAMILY_SI) {
1005 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1006 gfx_nop = GFX_COMPUTE_NOP_SI;
1007 } else {
1008 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1009 gfx_nop = GFX_COMPUTE_NOP;
1010 }
1011
1012 r = amdgpu_cs_create_semaphore(&sem);
1013 CU_ASSERT_EQUAL(r, 0);
1014 for (i = 0; i < 2; i++) {
1015 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1016 CU_ASSERT_EQUAL(r, 0);
1017
1018 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1019 AMDGPU_GEM_DOMAIN_GTT, 0,
1020 &ib_result_handle[i], &ib_result_cpu[i],
1021 &ib_result_mc_address[i], &va_handle[i]);
1022 CU_ASSERT_EQUAL(r, 0);
1023
1024 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1025 NULL, &bo_list[i]);
1026 CU_ASSERT_EQUAL(r, 0);
1027 }
1028
1029 /* 1. same context different engine */
1030 ptr = ib_result_cpu[0];
1031 ptr[0] = sdma_nop;
1032 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1033 ib_info[0].size = 1;
1034
1035 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1036 ibs_request[0].number_of_ibs = 1;
1037 ibs_request[0].ibs = &ib_info[0];
1038 ibs_request[0].resources = bo_list[0];
1039 ibs_request[0].fence_info.handle = NULL;
1040 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1041 CU_ASSERT_EQUAL(r, 0);
1042 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1043 CU_ASSERT_EQUAL(r, 0);
1044
1045 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1046 CU_ASSERT_EQUAL(r, 0);
1047 ptr = ib_result_cpu[1];
1048 ptr[0] = gfx_nop;
1049 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1050 ib_info[1].size = 1;
1051
1052 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1053 ibs_request[1].number_of_ibs = 1;
1054 ibs_request[1].ibs = &ib_info[1];
1055 ibs_request[1].resources = bo_list[1];
1056 ibs_request[1].fence_info.handle = NULL;
1057
1058 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1059 CU_ASSERT_EQUAL(r, 0);
1060
1061 fence_status.context = context_handle[0];
1062 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1063 fence_status.ip_instance = 0;
1064 fence_status.fence = ibs_request[1].seq_no;
1065 r = amdgpu_cs_query_fence_status(&fence_status,
1066 500000000, 0, &expired);
1067 CU_ASSERT_EQUAL(r, 0);
1068 CU_ASSERT_EQUAL(expired, true);
1069
1070 /* 2. same engine different context */
1071 ptr = ib_result_cpu[0];
1072 ptr[0] = gfx_nop;
1073 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1074 ib_info[0].size = 1;
1075
1076 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1077 ibs_request[0].number_of_ibs = 1;
1078 ibs_request[0].ibs = &ib_info[0];
1079 ibs_request[0].resources = bo_list[0];
1080 ibs_request[0].fence_info.handle = NULL;
1081 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1082 CU_ASSERT_EQUAL(r, 0);
1083 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1084 CU_ASSERT_EQUAL(r, 0);
1085
1086 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1087 CU_ASSERT_EQUAL(r, 0);
1088 ptr = ib_result_cpu[1];
1089 ptr[0] = gfx_nop;
1090 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1091 ib_info[1].size = 1;
1092
1093 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1094 ibs_request[1].number_of_ibs = 1;
1095 ibs_request[1].ibs = &ib_info[1];
1096 ibs_request[1].resources = bo_list[1];
1097 ibs_request[1].fence_info.handle = NULL;
1098 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1099
1100 CU_ASSERT_EQUAL(r, 0);
1101
1102 fence_status.context = context_handle[1];
1103 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1104 fence_status.ip_instance = 0;
1105 fence_status.fence = ibs_request[1].seq_no;
1106 r = amdgpu_cs_query_fence_status(&fence_status,
1107 500000000, 0, &expired);
1108 CU_ASSERT_EQUAL(r, 0);
1109 CU_ASSERT_EQUAL(expired, true);
1110
1111 for (i = 0; i < 2; i++) {
1112 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1113 ib_result_mc_address[i], 4096);
1114 CU_ASSERT_EQUAL(r, 0);
1115
1116 r = amdgpu_bo_list_destroy(bo_list[i]);
1117 CU_ASSERT_EQUAL(r, 0);
1118
1119 r = amdgpu_cs_ctx_free(context_handle[i]);
1120 CU_ASSERT_EQUAL(r, 0);
1121 }
1122
1123 r = amdgpu_cs_destroy_semaphore(sem);
1124 CU_ASSERT_EQUAL(r, 0);
1125 }
1126
amdgpu_command_submission_compute_nop(void)1127 static void amdgpu_command_submission_compute_nop(void)
1128 {
1129 amdgpu_context_handle context_handle;
1130 amdgpu_bo_handle ib_result_handle;
1131 void *ib_result_cpu;
1132 uint64_t ib_result_mc_address;
1133 struct amdgpu_cs_request ibs_request;
1134 struct amdgpu_cs_ib_info ib_info;
1135 struct amdgpu_cs_fence fence_status;
1136 uint32_t *ptr;
1137 uint32_t expired;
1138 int r, instance;
1139 amdgpu_bo_list_handle bo_list;
1140 amdgpu_va_handle va_handle;
1141 struct drm_amdgpu_info_hw_ip info;
1142
1143 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1144 CU_ASSERT_EQUAL(r, 0);
1145
1146 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1147 CU_ASSERT_EQUAL(r, 0);
1148
1149 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1150 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1151 AMDGPU_GEM_DOMAIN_GTT, 0,
1152 &ib_result_handle, &ib_result_cpu,
1153 &ib_result_mc_address, &va_handle);
1154 CU_ASSERT_EQUAL(r, 0);
1155
1156 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1157 &bo_list);
1158 CU_ASSERT_EQUAL(r, 0);
1159
1160 ptr = ib_result_cpu;
1161 memset(ptr, 0, 16);
1162 ptr[0]=PACKET3(PACKET3_NOP, 14);
1163
1164 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1165 ib_info.ib_mc_address = ib_result_mc_address;
1166 ib_info.size = 16;
1167
1168 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1169 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1170 ibs_request.ring = instance;
1171 ibs_request.number_of_ibs = 1;
1172 ibs_request.ibs = &ib_info;
1173 ibs_request.resources = bo_list;
1174 ibs_request.fence_info.handle = NULL;
1175
1176 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1177 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1178 CU_ASSERT_EQUAL(r, 0);
1179
1180 fence_status.context = context_handle;
1181 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1182 fence_status.ip_instance = 0;
1183 fence_status.ring = instance;
1184 fence_status.fence = ibs_request.seq_no;
1185
1186 r = amdgpu_cs_query_fence_status(&fence_status,
1187 AMDGPU_TIMEOUT_INFINITE,
1188 0, &expired);
1189 CU_ASSERT_EQUAL(r, 0);
1190
1191 r = amdgpu_bo_list_destroy(bo_list);
1192 CU_ASSERT_EQUAL(r, 0);
1193
1194 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1195 ib_result_mc_address, 4096);
1196 CU_ASSERT_EQUAL(r, 0);
1197 }
1198
1199 r = amdgpu_cs_ctx_free(context_handle);
1200 CU_ASSERT_EQUAL(r, 0);
1201 }
1202
amdgpu_command_submission_compute_cp_write_data(void)1203 static void amdgpu_command_submission_compute_cp_write_data(void)
1204 {
1205 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1206 }
1207
amdgpu_command_submission_compute_cp_const_fill(void)1208 static void amdgpu_command_submission_compute_cp_const_fill(void)
1209 {
1210 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1211 }
1212
amdgpu_command_submission_compute_cp_copy_data(void)1213 static void amdgpu_command_submission_compute_cp_copy_data(void)
1214 {
1215 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1216 }
1217
amdgpu_command_submission_compute(void)1218 static void amdgpu_command_submission_compute(void)
1219 {
1220 /* write data using the CP */
1221 amdgpu_command_submission_compute_cp_write_data();
1222 /* const fill using the CP */
1223 amdgpu_command_submission_compute_cp_const_fill();
1224 /* copy data using the CP */
1225 amdgpu_command_submission_compute_cp_copy_data();
1226 /* nop test */
1227 amdgpu_command_submission_compute_nop();
1228 }
1229
1230 /*
1231 * caller need create/release:
1232 * pm4_src, resources, ib_info, and ibs_request
1233 * submit command stream described in ibs_request and wait for this IB accomplished
1234 */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1235 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1236 unsigned ip_type,
1237 int instance, int pm4_dw, uint32_t *pm4_src,
1238 int res_cnt, amdgpu_bo_handle *resources,
1239 struct amdgpu_cs_ib_info *ib_info,
1240 struct amdgpu_cs_request *ibs_request)
1241 {
1242 int r;
1243 uint32_t expired;
1244 uint32_t *ring_ptr;
1245 amdgpu_bo_handle ib_result_handle;
1246 void *ib_result_cpu;
1247 uint64_t ib_result_mc_address;
1248 struct amdgpu_cs_fence fence_status = {0};
1249 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1250 amdgpu_va_handle va_handle;
1251
1252 /* prepare CS */
1253 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1254 CU_ASSERT_NOT_EQUAL(resources, NULL);
1255 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1256 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1257 CU_ASSERT_TRUE(pm4_dw <= 1024);
1258
1259 /* allocate IB */
1260 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1261 AMDGPU_GEM_DOMAIN_GTT, 0,
1262 &ib_result_handle, &ib_result_cpu,
1263 &ib_result_mc_address, &va_handle);
1264 CU_ASSERT_EQUAL(r, 0);
1265
1266 /* copy PM4 packet to ring from caller */
1267 ring_ptr = ib_result_cpu;
1268 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1269
1270 ib_info->ib_mc_address = ib_result_mc_address;
1271 ib_info->size = pm4_dw;
1272
1273 ibs_request->ip_type = ip_type;
1274 ibs_request->ring = instance;
1275 ibs_request->number_of_ibs = 1;
1276 ibs_request->ibs = ib_info;
1277 ibs_request->fence_info.handle = NULL;
1278
1279 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1280 all_res[res_cnt] = ib_result_handle;
1281
1282 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1283 NULL, &ibs_request->resources);
1284 CU_ASSERT_EQUAL(r, 0);
1285
1286 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1287
1288 /* submit CS */
1289 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1290 CU_ASSERT_EQUAL(r, 0);
1291
1292 r = amdgpu_bo_list_destroy(ibs_request->resources);
1293 CU_ASSERT_EQUAL(r, 0);
1294
1295 fence_status.ip_type = ip_type;
1296 fence_status.ip_instance = 0;
1297 fence_status.ring = ibs_request->ring;
1298 fence_status.context = context_handle;
1299 fence_status.fence = ibs_request->seq_no;
1300
1301 /* wait for IB accomplished */
1302 r = amdgpu_cs_query_fence_status(&fence_status,
1303 AMDGPU_TIMEOUT_INFINITE,
1304 0, &expired);
1305 CU_ASSERT_EQUAL(r, 0);
1306 CU_ASSERT_EQUAL(expired, true);
1307
1308 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1309 ib_result_mc_address, 4096);
1310 CU_ASSERT_EQUAL(r, 0);
1311 }
1312
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1313 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1314 {
1315 const int sdma_write_length = 128;
1316 const int pm4_dw = 256;
1317 amdgpu_context_handle context_handle;
1318 amdgpu_bo_handle bo;
1319 amdgpu_bo_handle *resources;
1320 uint32_t *pm4;
1321 struct amdgpu_cs_ib_info *ib_info;
1322 struct amdgpu_cs_request *ibs_request;
1323 uint64_t bo_mc;
1324 volatile uint32_t *bo_cpu;
1325 int i, j, r, loop, ring_id;
1326 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1327 amdgpu_va_handle va_handle;
1328 struct drm_amdgpu_info_hw_ip hw_ip_info;
1329
1330 pm4 = calloc(pm4_dw, sizeof(*pm4));
1331 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1332
1333 ib_info = calloc(1, sizeof(*ib_info));
1334 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1335
1336 ibs_request = calloc(1, sizeof(*ibs_request));
1337 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1338
1339 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1340 CU_ASSERT_EQUAL(r, 0);
1341
1342 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1343 CU_ASSERT_EQUAL(r, 0);
1344
1345 /* prepare resource */
1346 resources = calloc(1, sizeof(amdgpu_bo_handle));
1347 CU_ASSERT_NOT_EQUAL(resources, NULL);
1348
1349 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1350 loop = 0;
1351 while(loop < 2) {
1352 /* allocate UC bo for sDMA use */
1353 r = amdgpu_bo_alloc_and_map(device_handle,
1354 sdma_write_length * sizeof(uint32_t),
1355 4096, AMDGPU_GEM_DOMAIN_GTT,
1356 gtt_flags[loop], &bo, (void**)&bo_cpu,
1357 &bo_mc, &va_handle);
1358 CU_ASSERT_EQUAL(r, 0);
1359
1360 /* clear bo */
1361 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1362
1363 resources[0] = bo;
1364
1365 /* fulfill PM4: test DMA write-linear */
1366 i = j = 0;
1367 if (ip_type == AMDGPU_HW_IP_DMA) {
1368 if (family_id == AMDGPU_FAMILY_SI)
1369 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1370 sdma_write_length);
1371 else
1372 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1373 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1374 pm4[i++] = 0xffffffff & bo_mc;
1375 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1376 if (family_id >= AMDGPU_FAMILY_AI)
1377 pm4[i++] = sdma_write_length - 1;
1378 else if (family_id != AMDGPU_FAMILY_SI)
1379 pm4[i++] = sdma_write_length;
1380 while(j++ < sdma_write_length)
1381 pm4[i++] = 0xdeadbeaf;
1382 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1383 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1384 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1385 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1386 pm4[i++] = 0xfffffffc & bo_mc;
1387 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1388 while(j++ < sdma_write_length)
1389 pm4[i++] = 0xdeadbeaf;
1390 }
1391
1392 amdgpu_test_exec_cs_helper(context_handle,
1393 ip_type, ring_id,
1394 i, pm4,
1395 1, resources,
1396 ib_info, ibs_request);
1397
1398 /* verify if SDMA test result meets with expected */
1399 i = 0;
1400 while(i < sdma_write_length) {
1401 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1402 }
1403
1404 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1405 sdma_write_length * sizeof(uint32_t));
1406 CU_ASSERT_EQUAL(r, 0);
1407 loop++;
1408 }
1409 }
1410 /* clean resources */
1411 free(resources);
1412 free(ibs_request);
1413 free(ib_info);
1414 free(pm4);
1415
1416 /* end of test */
1417 r = amdgpu_cs_ctx_free(context_handle);
1418 CU_ASSERT_EQUAL(r, 0);
1419 }
1420
amdgpu_command_submission_sdma_write_linear(void)1421 static void amdgpu_command_submission_sdma_write_linear(void)
1422 {
1423 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1424 }
1425
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1426 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1427 {
1428 const int sdma_write_length = 1024 * 1024;
1429 const int pm4_dw = 256;
1430 amdgpu_context_handle context_handle;
1431 amdgpu_bo_handle bo;
1432 amdgpu_bo_handle *resources;
1433 uint32_t *pm4;
1434 struct amdgpu_cs_ib_info *ib_info;
1435 struct amdgpu_cs_request *ibs_request;
1436 uint64_t bo_mc;
1437 volatile uint32_t *bo_cpu;
1438 int i, j, r, loop, ring_id;
1439 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1440 amdgpu_va_handle va_handle;
1441 struct drm_amdgpu_info_hw_ip hw_ip_info;
1442
1443 pm4 = calloc(pm4_dw, sizeof(*pm4));
1444 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1445
1446 ib_info = calloc(1, sizeof(*ib_info));
1447 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1448
1449 ibs_request = calloc(1, sizeof(*ibs_request));
1450 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1451
1452 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1453 CU_ASSERT_EQUAL(r, 0);
1454
1455 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1456 CU_ASSERT_EQUAL(r, 0);
1457
1458 /* prepare resource */
1459 resources = calloc(1, sizeof(amdgpu_bo_handle));
1460 CU_ASSERT_NOT_EQUAL(resources, NULL);
1461
1462 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1463 loop = 0;
1464 while(loop < 2) {
1465 /* allocate UC bo for sDMA use */
1466 r = amdgpu_bo_alloc_and_map(device_handle,
1467 sdma_write_length, 4096,
1468 AMDGPU_GEM_DOMAIN_GTT,
1469 gtt_flags[loop], &bo, (void**)&bo_cpu,
1470 &bo_mc, &va_handle);
1471 CU_ASSERT_EQUAL(r, 0);
1472
1473 /* clear bo */
1474 memset((void*)bo_cpu, 0, sdma_write_length);
1475
1476 resources[0] = bo;
1477
1478 /* fulfill PM4: test DMA const fill */
1479 i = j = 0;
1480 if (ip_type == AMDGPU_HW_IP_DMA) {
1481 if (family_id == AMDGPU_FAMILY_SI) {
1482 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1483 0, 0, 0,
1484 sdma_write_length / 4);
1485 pm4[i++] = 0xfffffffc & bo_mc;
1486 pm4[i++] = 0xdeadbeaf;
1487 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1488 } else {
1489 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1490 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1491 pm4[i++] = 0xffffffff & bo_mc;
1492 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1493 pm4[i++] = 0xdeadbeaf;
1494 if (family_id >= AMDGPU_FAMILY_AI)
1495 pm4[i++] = sdma_write_length - 1;
1496 else
1497 pm4[i++] = sdma_write_length;
1498 }
1499 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1500 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1501 if (family_id == AMDGPU_FAMILY_SI) {
1502 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1503 pm4[i++] = 0xdeadbeaf;
1504 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1505 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1506 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1507 PACKET3_DMA_DATA_SI_CP_SYNC;
1508 pm4[i++] = 0xffffffff & bo_mc;
1509 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1510 pm4[i++] = sdma_write_length;
1511 } else {
1512 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1513 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1514 PACKET3_DMA_DATA_DST_SEL(0) |
1515 PACKET3_DMA_DATA_SRC_SEL(2) |
1516 PACKET3_DMA_DATA_CP_SYNC;
1517 pm4[i++] = 0xdeadbeaf;
1518 pm4[i++] = 0;
1519 pm4[i++] = 0xfffffffc & bo_mc;
1520 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1521 pm4[i++] = sdma_write_length;
1522 }
1523 }
1524
1525 amdgpu_test_exec_cs_helper(context_handle,
1526 ip_type, ring_id,
1527 i, pm4,
1528 1, resources,
1529 ib_info, ibs_request);
1530
1531 /* verify if SDMA test result meets with expected */
1532 i = 0;
1533 while(i < (sdma_write_length / 4)) {
1534 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1535 }
1536
1537 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1538 sdma_write_length);
1539 CU_ASSERT_EQUAL(r, 0);
1540 loop++;
1541 }
1542 }
1543 /* clean resources */
1544 free(resources);
1545 free(ibs_request);
1546 free(ib_info);
1547 free(pm4);
1548
1549 /* end of test */
1550 r = amdgpu_cs_ctx_free(context_handle);
1551 CU_ASSERT_EQUAL(r, 0);
1552 }
1553
amdgpu_command_submission_sdma_const_fill(void)1554 static void amdgpu_command_submission_sdma_const_fill(void)
1555 {
1556 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1557 }
1558
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1559 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1560 {
1561 const int sdma_write_length = 1024;
1562 const int pm4_dw = 256;
1563 amdgpu_context_handle context_handle;
1564 amdgpu_bo_handle bo1, bo2;
1565 amdgpu_bo_handle *resources;
1566 uint32_t *pm4;
1567 struct amdgpu_cs_ib_info *ib_info;
1568 struct amdgpu_cs_request *ibs_request;
1569 uint64_t bo1_mc, bo2_mc;
1570 volatile unsigned char *bo1_cpu, *bo2_cpu;
1571 int i, j, r, loop1, loop2, ring_id;
1572 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1573 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1574 struct drm_amdgpu_info_hw_ip hw_ip_info;
1575
1576 pm4 = calloc(pm4_dw, sizeof(*pm4));
1577 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1578
1579 ib_info = calloc(1, sizeof(*ib_info));
1580 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1581
1582 ibs_request = calloc(1, sizeof(*ibs_request));
1583 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1584
1585 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1586 CU_ASSERT_EQUAL(r, 0);
1587
1588 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1589 CU_ASSERT_EQUAL(r, 0);
1590
1591 /* prepare resource */
1592 resources = calloc(2, sizeof(amdgpu_bo_handle));
1593 CU_ASSERT_NOT_EQUAL(resources, NULL);
1594
1595 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1596 loop1 = loop2 = 0;
1597 /* run 9 circle to test all mapping combination */
1598 while(loop1 < 2) {
1599 while(loop2 < 2) {
1600 /* allocate UC bo1for sDMA use */
1601 r = amdgpu_bo_alloc_and_map(device_handle,
1602 sdma_write_length, 4096,
1603 AMDGPU_GEM_DOMAIN_GTT,
1604 gtt_flags[loop1], &bo1,
1605 (void**)&bo1_cpu, &bo1_mc,
1606 &bo1_va_handle);
1607 CU_ASSERT_EQUAL(r, 0);
1608
1609 /* set bo1 */
1610 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1611
1612 /* allocate UC bo2 for sDMA use */
1613 r = amdgpu_bo_alloc_and_map(device_handle,
1614 sdma_write_length, 4096,
1615 AMDGPU_GEM_DOMAIN_GTT,
1616 gtt_flags[loop2], &bo2,
1617 (void**)&bo2_cpu, &bo2_mc,
1618 &bo2_va_handle);
1619 CU_ASSERT_EQUAL(r, 0);
1620
1621 /* clear bo2 */
1622 memset((void*)bo2_cpu, 0, sdma_write_length);
1623
1624 resources[0] = bo1;
1625 resources[1] = bo2;
1626
1627 /* fulfill PM4: test DMA copy linear */
1628 i = j = 0;
1629 if (ip_type == AMDGPU_HW_IP_DMA) {
1630 if (family_id == AMDGPU_FAMILY_SI) {
1631 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1632 0, 0, 0,
1633 sdma_write_length);
1634 pm4[i++] = 0xffffffff & bo2_mc;
1635 pm4[i++] = 0xffffffff & bo1_mc;
1636 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1637 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1638 } else {
1639 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1640 SDMA_COPY_SUB_OPCODE_LINEAR,
1641 0);
1642 if (family_id >= AMDGPU_FAMILY_AI)
1643 pm4[i++] = sdma_write_length - 1;
1644 else
1645 pm4[i++] = sdma_write_length;
1646 pm4[i++] = 0;
1647 pm4[i++] = 0xffffffff & bo1_mc;
1648 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1649 pm4[i++] = 0xffffffff & bo2_mc;
1650 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1651 }
1652 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1653 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1654 if (family_id == AMDGPU_FAMILY_SI) {
1655 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1656 pm4[i++] = 0xfffffffc & bo1_mc;
1657 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1658 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1659 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1660 PACKET3_DMA_DATA_SI_CP_SYNC |
1661 (0xffff00000000 & bo1_mc) >> 32;
1662 pm4[i++] = 0xfffffffc & bo2_mc;
1663 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1664 pm4[i++] = sdma_write_length;
1665 } else {
1666 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1667 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1668 PACKET3_DMA_DATA_DST_SEL(0) |
1669 PACKET3_DMA_DATA_SRC_SEL(0) |
1670 PACKET3_DMA_DATA_CP_SYNC;
1671 pm4[i++] = 0xfffffffc & bo1_mc;
1672 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1673 pm4[i++] = 0xfffffffc & bo2_mc;
1674 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1675 pm4[i++] = sdma_write_length;
1676 }
1677 }
1678
1679 amdgpu_test_exec_cs_helper(context_handle,
1680 ip_type, ring_id,
1681 i, pm4,
1682 2, resources,
1683 ib_info, ibs_request);
1684
1685 /* verify if SDMA test result meets with expected */
1686 i = 0;
1687 while(i < sdma_write_length) {
1688 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1689 }
1690 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1691 sdma_write_length);
1692 CU_ASSERT_EQUAL(r, 0);
1693 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1694 sdma_write_length);
1695 CU_ASSERT_EQUAL(r, 0);
1696 loop2++;
1697 }
1698 loop1++;
1699 }
1700 }
1701 /* clean resources */
1702 free(resources);
1703 free(ibs_request);
1704 free(ib_info);
1705 free(pm4);
1706
1707 /* end of test */
1708 r = amdgpu_cs_ctx_free(context_handle);
1709 CU_ASSERT_EQUAL(r, 0);
1710 }
1711
amdgpu_command_submission_sdma_copy_linear(void)1712 static void amdgpu_command_submission_sdma_copy_linear(void)
1713 {
1714 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1715 }
1716
amdgpu_command_submission_sdma(void)1717 static void amdgpu_command_submission_sdma(void)
1718 {
1719 amdgpu_command_submission_sdma_write_linear();
1720 amdgpu_command_submission_sdma_const_fill();
1721 amdgpu_command_submission_sdma_copy_linear();
1722 }
1723
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1724 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1725 {
1726 amdgpu_context_handle context_handle;
1727 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1728 void *ib_result_cpu, *ib_result_ce_cpu;
1729 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1730 struct amdgpu_cs_request ibs_request[2] = {0};
1731 struct amdgpu_cs_ib_info ib_info[2];
1732 struct amdgpu_cs_fence fence_status[2] = {0};
1733 uint32_t *ptr;
1734 uint32_t expired;
1735 amdgpu_bo_list_handle bo_list;
1736 amdgpu_va_handle va_handle, va_handle_ce;
1737 int r;
1738 int i = 0, ib_cs_num = 2;
1739
1740 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1741 CU_ASSERT_EQUAL(r, 0);
1742
1743 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1744 AMDGPU_GEM_DOMAIN_GTT, 0,
1745 &ib_result_handle, &ib_result_cpu,
1746 &ib_result_mc_address, &va_handle);
1747 CU_ASSERT_EQUAL(r, 0);
1748
1749 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1750 AMDGPU_GEM_DOMAIN_GTT, 0,
1751 &ib_result_ce_handle, &ib_result_ce_cpu,
1752 &ib_result_ce_mc_address, &va_handle_ce);
1753 CU_ASSERT_EQUAL(r, 0);
1754
1755 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1756 ib_result_ce_handle, &bo_list);
1757 CU_ASSERT_EQUAL(r, 0);
1758
1759 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1760
1761 /* IT_SET_CE_DE_COUNTERS */
1762 ptr = ib_result_ce_cpu;
1763 if (family_id != AMDGPU_FAMILY_SI) {
1764 ptr[i++] = 0xc0008900;
1765 ptr[i++] = 0;
1766 }
1767 ptr[i++] = 0xc0008400;
1768 ptr[i++] = 1;
1769 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1770 ib_info[0].size = i;
1771 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1772
1773 /* IT_WAIT_ON_CE_COUNTER */
1774 ptr = ib_result_cpu;
1775 ptr[0] = 0xc0008600;
1776 ptr[1] = 0x00000001;
1777 ib_info[1].ib_mc_address = ib_result_mc_address;
1778 ib_info[1].size = 2;
1779
1780 for (i = 0; i < ib_cs_num; i++) {
1781 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1782 ibs_request[i].number_of_ibs = 2;
1783 ibs_request[i].ibs = ib_info;
1784 ibs_request[i].resources = bo_list;
1785 ibs_request[i].fence_info.handle = NULL;
1786 }
1787
1788 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1789
1790 CU_ASSERT_EQUAL(r, 0);
1791
1792 for (i = 0; i < ib_cs_num; i++) {
1793 fence_status[i].context = context_handle;
1794 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1795 fence_status[i].fence = ibs_request[i].seq_no;
1796 }
1797
1798 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1799 AMDGPU_TIMEOUT_INFINITE,
1800 &expired, NULL);
1801 CU_ASSERT_EQUAL(r, 0);
1802
1803 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1804 ib_result_mc_address, 4096);
1805 CU_ASSERT_EQUAL(r, 0);
1806
1807 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1808 ib_result_ce_mc_address, 4096);
1809 CU_ASSERT_EQUAL(r, 0);
1810
1811 r = amdgpu_bo_list_destroy(bo_list);
1812 CU_ASSERT_EQUAL(r, 0);
1813
1814 r = amdgpu_cs_ctx_free(context_handle);
1815 CU_ASSERT_EQUAL(r, 0);
1816 }
1817
amdgpu_command_submission_multi_fence(void)1818 static void amdgpu_command_submission_multi_fence(void)
1819 {
1820 amdgpu_command_submission_multi_fence_wait_all(true);
1821 amdgpu_command_submission_multi_fence_wait_all(false);
1822 }
1823
amdgpu_userptr_test(void)1824 static void amdgpu_userptr_test(void)
1825 {
1826 int i, r, j;
1827 uint32_t *pm4 = NULL;
1828 uint64_t bo_mc;
1829 void *ptr = NULL;
1830 int pm4_dw = 256;
1831 int sdma_write_length = 4;
1832 amdgpu_bo_handle handle;
1833 amdgpu_context_handle context_handle;
1834 struct amdgpu_cs_ib_info *ib_info;
1835 struct amdgpu_cs_request *ibs_request;
1836 amdgpu_bo_handle buf_handle;
1837 amdgpu_va_handle va_handle;
1838
1839 pm4 = calloc(pm4_dw, sizeof(*pm4));
1840 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1841
1842 ib_info = calloc(1, sizeof(*ib_info));
1843 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1844
1845 ibs_request = calloc(1, sizeof(*ibs_request));
1846 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1847
1848 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1849 CU_ASSERT_EQUAL(r, 0);
1850
1851 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1852 CU_ASSERT_NOT_EQUAL(ptr, NULL);
1853 memset(ptr, 0, BUFFER_SIZE);
1854
1855 r = amdgpu_create_bo_from_user_mem(device_handle,
1856 ptr, BUFFER_SIZE, &buf_handle);
1857 CU_ASSERT_EQUAL(r, 0);
1858
1859 r = amdgpu_va_range_alloc(device_handle,
1860 amdgpu_gpu_va_range_general,
1861 BUFFER_SIZE, 1, 0, &bo_mc,
1862 &va_handle, 0);
1863 CU_ASSERT_EQUAL(r, 0);
1864
1865 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1866 CU_ASSERT_EQUAL(r, 0);
1867
1868 handle = buf_handle;
1869
1870 j = i = 0;
1871
1872 if (family_id == AMDGPU_FAMILY_SI)
1873 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1874 sdma_write_length);
1875 else
1876 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1877 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1878 pm4[i++] = 0xffffffff & bo_mc;
1879 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1880 if (family_id >= AMDGPU_FAMILY_AI)
1881 pm4[i++] = sdma_write_length - 1;
1882 else if (family_id != AMDGPU_FAMILY_SI)
1883 pm4[i++] = sdma_write_length;
1884
1885 while (j++ < sdma_write_length)
1886 pm4[i++] = 0xdeadbeaf;
1887
1888 if (!fork()) {
1889 pm4[0] = 0x0;
1890 exit(0);
1891 }
1892
1893 amdgpu_test_exec_cs_helper(context_handle,
1894 AMDGPU_HW_IP_DMA, 0,
1895 i, pm4,
1896 1, &handle,
1897 ib_info, ibs_request);
1898 i = 0;
1899 while (i < sdma_write_length) {
1900 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1901 }
1902 free(ibs_request);
1903 free(ib_info);
1904 free(pm4);
1905
1906 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1907 CU_ASSERT_EQUAL(r, 0);
1908 r = amdgpu_va_range_free(va_handle);
1909 CU_ASSERT_EQUAL(r, 0);
1910 r = amdgpu_bo_free(buf_handle);
1911 CU_ASSERT_EQUAL(r, 0);
1912 free(ptr);
1913
1914 r = amdgpu_cs_ctx_free(context_handle);
1915 CU_ASSERT_EQUAL(r, 0);
1916
1917 wait(NULL);
1918 }
1919
amdgpu_sync_dependency_test(void)1920 static void amdgpu_sync_dependency_test(void)
1921 {
1922 amdgpu_context_handle context_handle[2];
1923 amdgpu_bo_handle ib_result_handle;
1924 void *ib_result_cpu;
1925 uint64_t ib_result_mc_address;
1926 struct amdgpu_cs_request ibs_request;
1927 struct amdgpu_cs_ib_info ib_info;
1928 struct amdgpu_cs_fence fence_status;
1929 uint32_t expired;
1930 int i, j, r;
1931 amdgpu_bo_list_handle bo_list;
1932 amdgpu_va_handle va_handle;
1933 static uint32_t *ptr;
1934 uint64_t seq_no;
1935
1936 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1937 CU_ASSERT_EQUAL(r, 0);
1938 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1939 CU_ASSERT_EQUAL(r, 0);
1940
1941 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1942 AMDGPU_GEM_DOMAIN_GTT, 0,
1943 &ib_result_handle, &ib_result_cpu,
1944 &ib_result_mc_address, &va_handle);
1945 CU_ASSERT_EQUAL(r, 0);
1946
1947 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1948 &bo_list);
1949 CU_ASSERT_EQUAL(r, 0);
1950
1951 ptr = ib_result_cpu;
1952 i = 0;
1953
1954 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1955
1956 /* Dispatch minimal init config and verify it's executed */
1957 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1958 ptr[i++] = 0x80000000;
1959 ptr[i++] = 0x80000000;
1960
1961 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1962 ptr[i++] = 0x80000000;
1963
1964
1965 /* Program compute regs */
1966 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1967 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1968 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1969 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1970
1971
1972 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1973 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1974 /*
1975 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
1976 SGPRS = 1
1977 PRIORITY = 0
1978 FLOAT_MODE = 192 (0xc0)
1979 PRIV = 0
1980 DX10_CLAMP = 1
1981 DEBUG_MODE = 0
1982 IEEE_MODE = 0
1983 BULKY = 0
1984 CDBG_USER = 0
1985 *
1986 */
1987 ptr[i++] = 0x002c0040;
1988
1989
1990 /*
1991 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1992 USER_SGPR = 8
1993 TRAP_PRESENT = 0
1994 TGID_X_EN = 0
1995 TGID_Y_EN = 0
1996 TGID_Z_EN = 0
1997 TG_SIZE_EN = 0
1998 TIDIG_COMP_CNT = 0
1999 EXCP_EN_MSB = 0
2000 LDS_SIZE = 0
2001 EXCP_EN = 0
2002 *
2003 */
2004 ptr[i++] = 0x00000010;
2005
2006
2007 /*
2008 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2009 WAVESIZE = 0
2010 *
2011 */
2012 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2013 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2014 ptr[i++] = 0x00000100;
2015
2016 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2017 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2018 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2019 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2020
2021 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2022 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2023 ptr[i++] = 0;
2024
2025 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2026 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2027 ptr[i++] = 1;
2028 ptr[i++] = 1;
2029 ptr[i++] = 1;
2030
2031
2032 /* Dispatch */
2033 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2034 ptr[i++] = 1;
2035 ptr[i++] = 1;
2036 ptr[i++] = 1;
2037 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2038
2039
2040 while (i & 7)
2041 ptr[i++] = 0xffff1000; /* type3 nop packet */
2042
2043 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2044 ib_info.ib_mc_address = ib_result_mc_address;
2045 ib_info.size = i;
2046
2047 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2048 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2049 ibs_request.ring = 0;
2050 ibs_request.number_of_ibs = 1;
2051 ibs_request.ibs = &ib_info;
2052 ibs_request.resources = bo_list;
2053 ibs_request.fence_info.handle = NULL;
2054
2055 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2056 CU_ASSERT_EQUAL(r, 0);
2057 seq_no = ibs_request.seq_no;
2058
2059
2060
2061 /* Prepare second command with dependency on the first */
2062 j = i;
2063 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2064 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2065 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2066 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2067 ptr[i++] = 99;
2068
2069 while (i & 7)
2070 ptr[i++] = 0xffff1000; /* type3 nop packet */
2071
2072 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2073 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2074 ib_info.size = i - j;
2075
2076 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2077 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2078 ibs_request.ring = 0;
2079 ibs_request.number_of_ibs = 1;
2080 ibs_request.ibs = &ib_info;
2081 ibs_request.resources = bo_list;
2082 ibs_request.fence_info.handle = NULL;
2083
2084 ibs_request.number_of_dependencies = 1;
2085
2086 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2087 ibs_request.dependencies[0].context = context_handle[1];
2088 ibs_request.dependencies[0].ip_instance = 0;
2089 ibs_request.dependencies[0].ring = 0;
2090 ibs_request.dependencies[0].fence = seq_no;
2091
2092
2093 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2094 CU_ASSERT_EQUAL(r, 0);
2095
2096
2097 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2098 fence_status.context = context_handle[0];
2099 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2100 fence_status.ip_instance = 0;
2101 fence_status.ring = 0;
2102 fence_status.fence = ibs_request.seq_no;
2103
2104 r = amdgpu_cs_query_fence_status(&fence_status,
2105 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2106 CU_ASSERT_EQUAL(r, 0);
2107
2108 /* Expect the second command to wait for shader to complete */
2109 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2110
2111 r = amdgpu_bo_list_destroy(bo_list);
2112 CU_ASSERT_EQUAL(r, 0);
2113
2114 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2115 ib_result_mc_address, 4096);
2116 CU_ASSERT_EQUAL(r, 0);
2117
2118 r = amdgpu_cs_ctx_free(context_handle[0]);
2119 CU_ASSERT_EQUAL(r, 0);
2120 r = amdgpu_cs_ctx_free(context_handle[1]);
2121 CU_ASSERT_EQUAL(r, 0);
2122
2123 free(ibs_request.dependencies);
2124 }
2125
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2126 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2127 {
2128 struct amdgpu_test_shader *shader;
2129 int i, loop = 0x10000;
2130
2131 switch (family) {
2132 case AMDGPU_FAMILY_AI:
2133 shader = &memcpy_cs_hang_slow_ai;
2134 break;
2135 case AMDGPU_FAMILY_RV:
2136 shader = &memcpy_cs_hang_slow_rv;
2137 break;
2138 default:
2139 return -1;
2140 break;
2141 }
2142
2143 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2144
2145 for (i = 0; i < loop; i++)
2146 memcpy(ptr + shader->header_length + shader->body_length * i,
2147 shader->shader + shader->header_length,
2148 shader->body_length * sizeof(uint32_t));
2149
2150 memcpy(ptr + shader->header_length + shader->body_length * loop,
2151 shader->shader + shader->header_length + shader->body_length,
2152 shader->foot_length * sizeof(uint32_t));
2153
2154 return 0;
2155 }
2156
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2157 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2158 int cs_type)
2159 {
2160 uint32_t shader_size;
2161 const uint32_t *shader;
2162
2163 switch (cs_type) {
2164 case CS_BUFFERCLEAR:
2165 shader = bufferclear_cs_shader_gfx9;
2166 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2167 break;
2168 case CS_BUFFERCOPY:
2169 shader = buffercopy_cs_shader_gfx9;
2170 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2171 break;
2172 case CS_HANG:
2173 shader = memcpy_ps_hang;
2174 shader_size = sizeof(memcpy_ps_hang);
2175 break;
2176 default:
2177 return -1;
2178 break;
2179 }
2180
2181 memcpy(ptr, shader, shader_size);
2182 return 0;
2183 }
2184
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2185 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2186 {
2187 int i = 0;
2188
2189 /* Write context control and load shadowing register if necessary */
2190 if (ip_type == AMDGPU_HW_IP_GFX) {
2191 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2192 ptr[i++] = 0x80000000;
2193 ptr[i++] = 0x80000000;
2194 }
2195
2196 /* Issue commands to set default compute state. */
2197 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2198 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2199 ptr[i++] = 0x204;
2200 i += 3;
2201
2202 /* clear mmCOMPUTE_TMPRING_SIZE */
2203 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2204 ptr[i++] = 0x218;
2205 ptr[i++] = 0;
2206
2207 return i;
2208 }
2209
amdgpu_dispatch_write_cumask(uint32_t * ptr)2210 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2211 {
2212 int i = 0;
2213
2214 /* Issue commands to set cu mask used in current dispatch */
2215 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2216 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2217 ptr[i++] = 0x216;
2218 ptr[i++] = 0xffffffff;
2219 ptr[i++] = 0xffffffff;
2220 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2221 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2222 ptr[i++] = 0x219;
2223 ptr[i++] = 0xffffffff;
2224 ptr[i++] = 0xffffffff;
2225
2226 return i;
2227 }
2228
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2229 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2230 {
2231 int i, j;
2232
2233 i = 0;
2234
2235 /* Writes shader state to HW */
2236 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2237 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2238 ptr[i++] = 0x20c;
2239 ptr[i++] = (shader_addr >> 8);
2240 ptr[i++] = (shader_addr >> 40);
2241 /* write sh regs*/
2242 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2243 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2244 /* - Gfx9ShRegBase */
2245 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2246 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2247 }
2248
2249 return i;
2250 }
2251
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2252 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2253 uint32_t ip_type,
2254 uint32_t ring)
2255 {
2256 amdgpu_context_handle context_handle;
2257 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2258 volatile unsigned char *ptr_dst;
2259 void *ptr_shader;
2260 uint32_t *ptr_cmd;
2261 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2262 amdgpu_va_handle va_dst, va_shader, va_cmd;
2263 int i, r;
2264 int bo_dst_size = 16384;
2265 int bo_shader_size = 4096;
2266 int bo_cmd_size = 4096;
2267 struct amdgpu_cs_request ibs_request = {0};
2268 struct amdgpu_cs_ib_info ib_info= {0};
2269 amdgpu_bo_list_handle bo_list;
2270 struct amdgpu_cs_fence fence_status = {0};
2271 uint32_t expired;
2272
2273 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2274 CU_ASSERT_EQUAL(r, 0);
2275
2276 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2277 AMDGPU_GEM_DOMAIN_GTT, 0,
2278 &bo_cmd, (void **)&ptr_cmd,
2279 &mc_address_cmd, &va_cmd);
2280 CU_ASSERT_EQUAL(r, 0);
2281 memset(ptr_cmd, 0, bo_cmd_size);
2282
2283 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2284 AMDGPU_GEM_DOMAIN_VRAM, 0,
2285 &bo_shader, &ptr_shader,
2286 &mc_address_shader, &va_shader);
2287 CU_ASSERT_EQUAL(r, 0);
2288 memset(ptr_shader, 0, bo_shader_size);
2289
2290 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2291 CU_ASSERT_EQUAL(r, 0);
2292
2293 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2294 AMDGPU_GEM_DOMAIN_VRAM, 0,
2295 &bo_dst, (void **)&ptr_dst,
2296 &mc_address_dst, &va_dst);
2297 CU_ASSERT_EQUAL(r, 0);
2298
2299 i = 0;
2300 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2301
2302 /* Issue commands to set cu mask used in current dispatch */
2303 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2304
2305 /* Writes shader state to HW */
2306 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2307
2308 /* Write constant data */
2309 /* Writes the UAV constant data to the SGPRs. */
2310 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2311 ptr_cmd[i++] = 0x240;
2312 ptr_cmd[i++] = mc_address_dst;
2313 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2314 ptr_cmd[i++] = 0x400;
2315 ptr_cmd[i++] = 0x74fac;
2316
2317 /* Sets a range of pixel shader constants */
2318 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2319 ptr_cmd[i++] = 0x244;
2320 ptr_cmd[i++] = 0x22222222;
2321 ptr_cmd[i++] = 0x22222222;
2322 ptr_cmd[i++] = 0x22222222;
2323 ptr_cmd[i++] = 0x22222222;
2324
2325 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2326 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2327 ptr_cmd[i++] = 0x215;
2328 ptr_cmd[i++] = 0;
2329
2330 /* dispatch direct command */
2331 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2332 ptr_cmd[i++] = 0x10;
2333 ptr_cmd[i++] = 1;
2334 ptr_cmd[i++] = 1;
2335 ptr_cmd[i++] = 1;
2336
2337 while (i & 7)
2338 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2339
2340 resources[0] = bo_dst;
2341 resources[1] = bo_shader;
2342 resources[2] = bo_cmd;
2343 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2344 CU_ASSERT_EQUAL(r, 0);
2345
2346 ib_info.ib_mc_address = mc_address_cmd;
2347 ib_info.size = i;
2348 ibs_request.ip_type = ip_type;
2349 ibs_request.ring = ring;
2350 ibs_request.resources = bo_list;
2351 ibs_request.number_of_ibs = 1;
2352 ibs_request.ibs = &ib_info;
2353 ibs_request.fence_info.handle = NULL;
2354
2355 /* submit CS */
2356 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2357 CU_ASSERT_EQUAL(r, 0);
2358
2359 r = amdgpu_bo_list_destroy(bo_list);
2360 CU_ASSERT_EQUAL(r, 0);
2361
2362 fence_status.ip_type = ip_type;
2363 fence_status.ip_instance = 0;
2364 fence_status.ring = ring;
2365 fence_status.context = context_handle;
2366 fence_status.fence = ibs_request.seq_no;
2367
2368 /* wait for IB accomplished */
2369 r = amdgpu_cs_query_fence_status(&fence_status,
2370 AMDGPU_TIMEOUT_INFINITE,
2371 0, &expired);
2372 CU_ASSERT_EQUAL(r, 0);
2373 CU_ASSERT_EQUAL(expired, true);
2374
2375 /* verify if memset test result meets with expected */
2376 i = 0;
2377 while(i < bo_dst_size) {
2378 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2379 }
2380
2381 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2382 CU_ASSERT_EQUAL(r, 0);
2383
2384 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2385 CU_ASSERT_EQUAL(r, 0);
2386
2387 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2388 CU_ASSERT_EQUAL(r, 0);
2389
2390 r = amdgpu_cs_ctx_free(context_handle);
2391 CU_ASSERT_EQUAL(r, 0);
2392 }
2393
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2394 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2395 uint32_t ip_type,
2396 uint32_t ring,
2397 int hang)
2398 {
2399 amdgpu_context_handle context_handle;
2400 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2401 volatile unsigned char *ptr_dst;
2402 void *ptr_shader;
2403 unsigned char *ptr_src;
2404 uint32_t *ptr_cmd;
2405 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2406 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2407 int i, r;
2408 int bo_dst_size = 16384;
2409 int bo_shader_size = 4096;
2410 int bo_cmd_size = 4096;
2411 struct amdgpu_cs_request ibs_request = {0};
2412 struct amdgpu_cs_ib_info ib_info= {0};
2413 uint32_t expired, hang_state, hangs;
2414 enum cs_type cs_type;
2415 amdgpu_bo_list_handle bo_list;
2416 struct amdgpu_cs_fence fence_status = {0};
2417
2418 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2419 CU_ASSERT_EQUAL(r, 0);
2420
2421 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2422 AMDGPU_GEM_DOMAIN_GTT, 0,
2423 &bo_cmd, (void **)&ptr_cmd,
2424 &mc_address_cmd, &va_cmd);
2425 CU_ASSERT_EQUAL(r, 0);
2426 memset(ptr_cmd, 0, bo_cmd_size);
2427
2428 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2429 AMDGPU_GEM_DOMAIN_VRAM, 0,
2430 &bo_shader, &ptr_shader,
2431 &mc_address_shader, &va_shader);
2432 CU_ASSERT_EQUAL(r, 0);
2433 memset(ptr_shader, 0, bo_shader_size);
2434
2435 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2436 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2437 CU_ASSERT_EQUAL(r, 0);
2438
2439 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2440 AMDGPU_GEM_DOMAIN_VRAM, 0,
2441 &bo_src, (void **)&ptr_src,
2442 &mc_address_src, &va_src);
2443 CU_ASSERT_EQUAL(r, 0);
2444
2445 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2446 AMDGPU_GEM_DOMAIN_VRAM, 0,
2447 &bo_dst, (void **)&ptr_dst,
2448 &mc_address_dst, &va_dst);
2449 CU_ASSERT_EQUAL(r, 0);
2450
2451 memset(ptr_src, 0x55, bo_dst_size);
2452
2453 i = 0;
2454 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2455
2456 /* Issue commands to set cu mask used in current dispatch */
2457 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2458
2459 /* Writes shader state to HW */
2460 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2461
2462 /* Write constant data */
2463 /* Writes the texture resource constants data to the SGPRs */
2464 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2465 ptr_cmd[i++] = 0x240;
2466 ptr_cmd[i++] = mc_address_src;
2467 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2468 ptr_cmd[i++] = 0x400;
2469 ptr_cmd[i++] = 0x74fac;
2470
2471 /* Writes the UAV constant data to the SGPRs. */
2472 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2473 ptr_cmd[i++] = 0x244;
2474 ptr_cmd[i++] = mc_address_dst;
2475 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2476 ptr_cmd[i++] = 0x400;
2477 ptr_cmd[i++] = 0x74fac;
2478
2479 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2480 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2481 ptr_cmd[i++] = 0x215;
2482 ptr_cmd[i++] = 0;
2483
2484 /* dispatch direct command */
2485 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2486 ptr_cmd[i++] = 0x10;
2487 ptr_cmd[i++] = 1;
2488 ptr_cmd[i++] = 1;
2489 ptr_cmd[i++] = 1;
2490
2491 while (i & 7)
2492 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2493
2494 resources[0] = bo_shader;
2495 resources[1] = bo_src;
2496 resources[2] = bo_dst;
2497 resources[3] = bo_cmd;
2498 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2499 CU_ASSERT_EQUAL(r, 0);
2500
2501 ib_info.ib_mc_address = mc_address_cmd;
2502 ib_info.size = i;
2503 ibs_request.ip_type = ip_type;
2504 ibs_request.ring = ring;
2505 ibs_request.resources = bo_list;
2506 ibs_request.number_of_ibs = 1;
2507 ibs_request.ibs = &ib_info;
2508 ibs_request.fence_info.handle = NULL;
2509 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2510 CU_ASSERT_EQUAL(r, 0);
2511
2512 fence_status.ip_type = ip_type;
2513 fence_status.ip_instance = 0;
2514 fence_status.ring = ring;
2515 fence_status.context = context_handle;
2516 fence_status.fence = ibs_request.seq_no;
2517
2518 /* wait for IB accomplished */
2519 r = amdgpu_cs_query_fence_status(&fence_status,
2520 AMDGPU_TIMEOUT_INFINITE,
2521 0, &expired);
2522
2523 if (!hang) {
2524 CU_ASSERT_EQUAL(r, 0);
2525 CU_ASSERT_EQUAL(expired, true);
2526
2527 /* verify if memcpy test result meets with expected */
2528 i = 0;
2529 while(i < bo_dst_size) {
2530 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2531 i++;
2532 }
2533 } else {
2534 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2535 CU_ASSERT_EQUAL(r, 0);
2536 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2537 }
2538
2539 r = amdgpu_bo_list_destroy(bo_list);
2540 CU_ASSERT_EQUAL(r, 0);
2541
2542 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2543 CU_ASSERT_EQUAL(r, 0);
2544 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2545 CU_ASSERT_EQUAL(r, 0);
2546
2547 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2548 CU_ASSERT_EQUAL(r, 0);
2549
2550 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2551 CU_ASSERT_EQUAL(r, 0);
2552
2553 r = amdgpu_cs_ctx_free(context_handle);
2554 CU_ASSERT_EQUAL(r, 0);
2555 }
2556
amdgpu_compute_dispatch_test(void)2557 static void amdgpu_compute_dispatch_test(void)
2558 {
2559 int r;
2560 struct drm_amdgpu_info_hw_ip info;
2561 uint32_t ring_id;
2562
2563 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2564 CU_ASSERT_EQUAL(r, 0);
2565 if (!info.available_rings)
2566 printf("SKIP ... as there's no compute ring\n");
2567
2568 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2569 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2570 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2571 }
2572 }
2573
amdgpu_gfx_dispatch_test(void)2574 static void amdgpu_gfx_dispatch_test(void)
2575 {
2576 int r;
2577 struct drm_amdgpu_info_hw_ip info;
2578 uint32_t ring_id;
2579
2580 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2581 CU_ASSERT_EQUAL(r, 0);
2582 if (!info.available_rings)
2583 printf("SKIP ... as there's no graphics ring\n");
2584
2585 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2586 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2587 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2588 }
2589 }
2590
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2591 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2592 {
2593 int r;
2594 struct drm_amdgpu_info_hw_ip info;
2595 uint32_t ring_id;
2596
2597 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2598 CU_ASSERT_EQUAL(r, 0);
2599 if (!info.available_rings)
2600 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2601
2602 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2603 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2604 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2605 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2606 }
2607 }
2608
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2609 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2610 uint32_t ip_type, uint32_t ring)
2611 {
2612 amdgpu_context_handle context_handle;
2613 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2614 volatile unsigned char *ptr_dst;
2615 void *ptr_shader;
2616 unsigned char *ptr_src;
2617 uint32_t *ptr_cmd;
2618 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2619 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2620 int i, r;
2621 int bo_dst_size = 0x4000000;
2622 int bo_shader_size = 0x400000;
2623 int bo_cmd_size = 4096;
2624 struct amdgpu_cs_request ibs_request = {0};
2625 struct amdgpu_cs_ib_info ib_info= {0};
2626 uint32_t hang_state, hangs, expired;
2627 struct amdgpu_gpu_info gpu_info = {0};
2628 amdgpu_bo_list_handle bo_list;
2629 struct amdgpu_cs_fence fence_status = {0};
2630
2631 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2632 CU_ASSERT_EQUAL(r, 0);
2633
2634 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2635 CU_ASSERT_EQUAL(r, 0);
2636
2637 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2638 AMDGPU_GEM_DOMAIN_GTT, 0,
2639 &bo_cmd, (void **)&ptr_cmd,
2640 &mc_address_cmd, &va_cmd);
2641 CU_ASSERT_EQUAL(r, 0);
2642 memset(ptr_cmd, 0, bo_cmd_size);
2643
2644 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2645 AMDGPU_GEM_DOMAIN_VRAM, 0,
2646 &bo_shader, &ptr_shader,
2647 &mc_address_shader, &va_shader);
2648 CU_ASSERT_EQUAL(r, 0);
2649 memset(ptr_shader, 0, bo_shader_size);
2650
2651 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2652 CU_ASSERT_EQUAL(r, 0);
2653
2654 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2655 AMDGPU_GEM_DOMAIN_VRAM, 0,
2656 &bo_src, (void **)&ptr_src,
2657 &mc_address_src, &va_src);
2658 CU_ASSERT_EQUAL(r, 0);
2659
2660 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2661 AMDGPU_GEM_DOMAIN_VRAM, 0,
2662 &bo_dst, (void **)&ptr_dst,
2663 &mc_address_dst, &va_dst);
2664 CU_ASSERT_EQUAL(r, 0);
2665
2666 memset(ptr_src, 0x55, bo_dst_size);
2667
2668 i = 0;
2669 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2670
2671 /* Issue commands to set cu mask used in current dispatch */
2672 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2673
2674 /* Writes shader state to HW */
2675 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2676
2677 /* Write constant data */
2678 /* Writes the texture resource constants data to the SGPRs */
2679 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2680 ptr_cmd[i++] = 0x240;
2681 ptr_cmd[i++] = mc_address_src;
2682 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2683 ptr_cmd[i++] = 0x400000;
2684 ptr_cmd[i++] = 0x74fac;
2685
2686 /* Writes the UAV constant data to the SGPRs. */
2687 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2688 ptr_cmd[i++] = 0x244;
2689 ptr_cmd[i++] = mc_address_dst;
2690 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2691 ptr_cmd[i++] = 0x400000;
2692 ptr_cmd[i++] = 0x74fac;
2693
2694 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2695 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2696 ptr_cmd[i++] = 0x215;
2697 ptr_cmd[i++] = 0;
2698
2699 /* dispatch direct command */
2700 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2701 ptr_cmd[i++] = 0x10000;
2702 ptr_cmd[i++] = 1;
2703 ptr_cmd[i++] = 1;
2704 ptr_cmd[i++] = 1;
2705
2706 while (i & 7)
2707 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2708
2709 resources[0] = bo_shader;
2710 resources[1] = bo_src;
2711 resources[2] = bo_dst;
2712 resources[3] = bo_cmd;
2713 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2714 CU_ASSERT_EQUAL(r, 0);
2715
2716 ib_info.ib_mc_address = mc_address_cmd;
2717 ib_info.size = i;
2718 ibs_request.ip_type = ip_type;
2719 ibs_request.ring = ring;
2720 ibs_request.resources = bo_list;
2721 ibs_request.number_of_ibs = 1;
2722 ibs_request.ibs = &ib_info;
2723 ibs_request.fence_info.handle = NULL;
2724 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2725 CU_ASSERT_EQUAL(r, 0);
2726
2727 fence_status.ip_type = ip_type;
2728 fence_status.ip_instance = 0;
2729 fence_status.ring = ring;
2730 fence_status.context = context_handle;
2731 fence_status.fence = ibs_request.seq_no;
2732
2733 /* wait for IB accomplished */
2734 r = amdgpu_cs_query_fence_status(&fence_status,
2735 AMDGPU_TIMEOUT_INFINITE,
2736 0, &expired);
2737
2738 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2739 CU_ASSERT_EQUAL(r, 0);
2740 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2741
2742 r = amdgpu_bo_list_destroy(bo_list);
2743 CU_ASSERT_EQUAL(r, 0);
2744
2745 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2746 CU_ASSERT_EQUAL(r, 0);
2747 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2748 CU_ASSERT_EQUAL(r, 0);
2749
2750 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2751 CU_ASSERT_EQUAL(r, 0);
2752
2753 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2754 CU_ASSERT_EQUAL(r, 0);
2755
2756 r = amdgpu_cs_ctx_free(context_handle);
2757 CU_ASSERT_EQUAL(r, 0);
2758 }
2759
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2760 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2761 {
2762 int r;
2763 struct drm_amdgpu_info_hw_ip info;
2764 uint32_t ring_id;
2765
2766 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2767 CU_ASSERT_EQUAL(r, 0);
2768 if (!info.available_rings)
2769 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2770
2771 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2772 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2773 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2774 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2775 }
2776 }
2777
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2778 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2779 {
2780 struct amdgpu_test_shader *shader;
2781 int i, loop = 0x40000;
2782
2783 switch (family) {
2784 case AMDGPU_FAMILY_AI:
2785 case AMDGPU_FAMILY_RV:
2786 shader = &memcpy_ps_hang_slow_ai;
2787 break;
2788 default:
2789 return -1;
2790 break;
2791 }
2792
2793 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2794
2795 for (i = 0; i < loop; i++)
2796 memcpy(ptr + shader->header_length + shader->body_length * i,
2797 shader->shader + shader->header_length,
2798 shader->body_length * sizeof(uint32_t));
2799
2800 memcpy(ptr + shader->header_length + shader->body_length * loop,
2801 shader->shader + shader->header_length + shader->body_length,
2802 shader->foot_length * sizeof(uint32_t));
2803
2804 return 0;
2805 }
2806
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2807 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2808 {
2809 int i;
2810 uint32_t shader_offset= 256;
2811 uint32_t mem_offset, patch_code_offset;
2812 uint32_t shader_size, patchinfo_code_size;
2813 const uint32_t *shader;
2814 const uint32_t *patchinfo_code;
2815 const uint32_t *patchcode_offset;
2816
2817 switch (ps_type) {
2818 case PS_CONST:
2819 shader = ps_const_shader_gfx9;
2820 shader_size = sizeof(ps_const_shader_gfx9);
2821 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2822 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2823 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2824 break;
2825 case PS_TEX:
2826 shader = ps_tex_shader_gfx9;
2827 shader_size = sizeof(ps_tex_shader_gfx9);
2828 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2829 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2830 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2831 break;
2832 case PS_HANG:
2833 shader = memcpy_ps_hang;
2834 shader_size = sizeof(memcpy_ps_hang);
2835
2836 memcpy(ptr, shader, shader_size);
2837 return 0;
2838 default:
2839 return -1;
2840 break;
2841 }
2842
2843 /* write main shader program */
2844 for (i = 0 ; i < 10; i++) {
2845 mem_offset = i * shader_offset;
2846 memcpy(ptr + mem_offset, shader, shader_size);
2847 }
2848
2849 /* overwrite patch codes */
2850 for (i = 0 ; i < 10; i++) {
2851 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2852 patch_code_offset = i * patchinfo_code_size;
2853 memcpy(ptr + mem_offset,
2854 patchinfo_code + patch_code_offset,
2855 patchinfo_code_size * sizeof(uint32_t));
2856 }
2857
2858 return 0;
2859 }
2860
2861 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)2862 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2863 {
2864 const uint32_t *shader;
2865 uint32_t shader_size;
2866
2867 shader = vs_RectPosTexFast_shader_gfx9;
2868 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2869
2870 memcpy(ptr, shader, shader_size);
2871
2872 return 0;
2873 }
2874
amdgpu_draw_init(uint32_t * ptr)2875 static int amdgpu_draw_init(uint32_t *ptr)
2876 {
2877 int i = 0;
2878 const uint32_t *preamblecache_ptr;
2879 uint32_t preamblecache_size;
2880
2881 /* Write context control and load shadowing register if necessary */
2882 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2883 ptr[i++] = 0x80000000;
2884 ptr[i++] = 0x80000000;
2885
2886 preamblecache_ptr = preamblecache_gfx9;
2887 preamblecache_size = sizeof(preamblecache_gfx9);
2888
2889 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2890 return i + preamblecache_size/sizeof(uint32_t);
2891 }
2892
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)2893 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2894 uint64_t dst_addr,
2895 int hang_slow)
2896 {
2897 int i = 0;
2898
2899 /* setup color buffer */
2900 /* offset reg
2901 0xA318 CB_COLOR0_BASE
2902 0xA319 CB_COLOR0_BASE_EXT
2903 0xA31A CB_COLOR0_ATTRIB2
2904 0xA31B CB_COLOR0_VIEW
2905 0xA31C CB_COLOR0_INFO
2906 0xA31D CB_COLOR0_ATTRIB
2907 0xA31E CB_COLOR0_DCC_CONTROL
2908 0xA31F CB_COLOR0_CMASK
2909 0xA320 CB_COLOR0_CMASK_BASE_EXT
2910 0xA321 CB_COLOR0_FMASK
2911 0xA322 CB_COLOR0_FMASK_BASE_EXT
2912 0xA323 CB_COLOR0_CLEAR_WORD0
2913 0xA324 CB_COLOR0_CLEAR_WORD1
2914 0xA325 CB_COLOR0_DCC_BASE
2915 0xA326 CB_COLOR0_DCC_BASE_EXT */
2916 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2917 ptr[i++] = 0x318;
2918 ptr[i++] = dst_addr >> 8;
2919 ptr[i++] = dst_addr >> 40;
2920 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
2921 ptr[i++] = 0;
2922 ptr[i++] = 0x50438;
2923 ptr[i++] = 0x10140000;
2924 i += 9;
2925
2926 /* mmCB_MRT0_EPITCH */
2927 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2928 ptr[i++] = 0x1e8;
2929 ptr[i++] = hang_slow ? 0x7ff : 0x1f;
2930
2931 /* 0xA32B CB_COLOR1_BASE */
2932 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2933 ptr[i++] = 0x32b;
2934 ptr[i++] = 0;
2935
2936 /* 0xA33A CB_COLOR1_BASE */
2937 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2938 ptr[i++] = 0x33a;
2939 ptr[i++] = 0;
2940
2941 /* SPI_SHADER_COL_FORMAT */
2942 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2943 ptr[i++] = 0x1c5;
2944 ptr[i++] = 9;
2945
2946 /* Setup depth buffer */
2947 /* mmDB_Z_INFO */
2948 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2949 ptr[i++] = 0xe;
2950 i += 2;
2951
2952 return i;
2953 }
2954
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)2955 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
2956 {
2957 int i = 0;
2958 const uint32_t *cached_cmd_ptr;
2959 uint32_t cached_cmd_size;
2960
2961 /* mmPA_SC_TILE_STEERING_OVERRIDE */
2962 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2963 ptr[i++] = 0xd7;
2964 ptr[i++] = 0;
2965
2966 ptr[i++] = 0xffff1000;
2967 ptr[i++] = 0xc0021000;
2968
2969 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2970 ptr[i++] = 0xd7;
2971 ptr[i++] = 1;
2972
2973 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2974 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2975 ptr[i++] = 0x2fe;
2976 i += 16;
2977
2978 /* mmPA_SC_CENTROID_PRIORITY_0 */
2979 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2980 ptr[i++] = 0x2f5;
2981 i += 2;
2982
2983 cached_cmd_ptr = cached_cmd_gfx9;
2984 cached_cmd_size = sizeof(cached_cmd_gfx9);
2985
2986 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2987 if (hang_slow)
2988 *(ptr + i + 12) = 0x8000800;
2989 i += cached_cmd_size/sizeof(uint32_t);
2990
2991 return i;
2992 }
2993
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)2994 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2995 int ps_type,
2996 uint64_t shader_addr,
2997 int hang_slow)
2998 {
2999 int i = 0;
3000
3001 /* mmPA_CL_VS_OUT_CNTL */
3002 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3003 ptr[i++] = 0x207;
3004 ptr[i++] = 0;
3005
3006 /* mmSPI_SHADER_PGM_RSRC3_VS */
3007 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3008 ptr[i++] = 0x46;
3009 ptr[i++] = 0xffff;
3010
3011 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3012 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3013 ptr[i++] = 0x48;
3014 ptr[i++] = shader_addr >> 8;
3015 ptr[i++] = shader_addr >> 40;
3016
3017 /* mmSPI_SHADER_PGM_RSRC1_VS */
3018 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3019 ptr[i++] = 0x4a;
3020 ptr[i++] = 0xc0081;
3021 /* mmSPI_SHADER_PGM_RSRC2_VS */
3022 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3023 ptr[i++] = 0x4b;
3024 ptr[i++] = 0x18;
3025
3026 /* mmSPI_VS_OUT_CONFIG */
3027 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3028 ptr[i++] = 0x1b1;
3029 ptr[i++] = 2;
3030
3031 /* mmSPI_SHADER_POS_FORMAT */
3032 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3033 ptr[i++] = 0x1c3;
3034 ptr[i++] = 4;
3035
3036 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3037 ptr[i++] = 0x4c;
3038 i += 2;
3039 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3040 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3041
3042 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3043 ptr[i++] = 0x50;
3044 i += 2;
3045 if (ps_type == PS_CONST) {
3046 i += 2;
3047 } else if (ps_type == PS_TEX) {
3048 ptr[i++] = 0x3f800000;
3049 ptr[i++] = 0x3f800000;
3050 }
3051
3052 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3053 ptr[i++] = 0x54;
3054 i += 4;
3055
3056 return i;
3057 }
3058
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3059 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3060 int ps_type,
3061 uint64_t shader_addr)
3062 {
3063 int i, j;
3064 const uint32_t *sh_registers;
3065 const uint32_t *context_registers;
3066 uint32_t num_sh_reg, num_context_reg;
3067
3068 if (ps_type == PS_CONST) {
3069 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3070 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3071 num_sh_reg = ps_num_sh_registers_gfx9;
3072 num_context_reg = ps_num_context_registers_gfx9;
3073 } else if (ps_type == PS_TEX) {
3074 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3075 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3076 num_sh_reg = ps_num_sh_registers_gfx9;
3077 num_context_reg = ps_num_context_registers_gfx9;
3078 }
3079
3080 i = 0;
3081
3082 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3083 0x2c08 SPI_SHADER_PGM_LO_PS
3084 0x2c09 SPI_SHADER_PGM_HI_PS */
3085 shader_addr += 256 * 9;
3086 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3087 ptr[i++] = 0x7;
3088 ptr[i++] = 0xffff;
3089 ptr[i++] = shader_addr >> 8;
3090 ptr[i++] = shader_addr >> 40;
3091
3092 for (j = 0; j < num_sh_reg; j++) {
3093 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3094 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3095 ptr[i++] = sh_registers[j * 2 + 1];
3096 }
3097
3098 for (j = 0; j < num_context_reg; j++) {
3099 if (context_registers[j * 2] != 0xA1C5) {
3100 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3101 ptr[i++] = context_registers[j * 2] - 0xa000;
3102 ptr[i++] = context_registers[j * 2 + 1];
3103 }
3104
3105 if (context_registers[j * 2] == 0xA1B4) {
3106 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3107 ptr[i++] = 0x1b3;
3108 ptr[i++] = 2;
3109 }
3110 }
3111
3112 return i;
3113 }
3114
amdgpu_draw_draw(uint32_t * ptr)3115 static int amdgpu_draw_draw(uint32_t *ptr)
3116 {
3117 int i = 0;
3118
3119 /* mmIA_MULTI_VGT_PARAM */
3120 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3121 ptr[i++] = 0x40000258;
3122 ptr[i++] = 0xd00ff;
3123
3124 /* mmVGT_PRIMITIVE_TYPE */
3125 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3126 ptr[i++] = 0x10000242;
3127 ptr[i++] = 0x11;
3128
3129 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3130 ptr[i++] = 3;
3131 ptr[i++] = 2;
3132
3133 return i;
3134 }
3135
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3136 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3137 amdgpu_bo_handle bo_shader_ps,
3138 amdgpu_bo_handle bo_shader_vs,
3139 uint64_t mc_address_shader_ps,
3140 uint64_t mc_address_shader_vs,
3141 uint32_t ring_id)
3142 {
3143 amdgpu_context_handle context_handle;
3144 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3145 volatile unsigned char *ptr_dst;
3146 uint32_t *ptr_cmd;
3147 uint64_t mc_address_dst, mc_address_cmd;
3148 amdgpu_va_handle va_dst, va_cmd;
3149 int i, r;
3150 int bo_dst_size = 16384;
3151 int bo_cmd_size = 4096;
3152 struct amdgpu_cs_request ibs_request = {0};
3153 struct amdgpu_cs_ib_info ib_info = {0};
3154 struct amdgpu_cs_fence fence_status = {0};
3155 uint32_t expired;
3156 amdgpu_bo_list_handle bo_list;
3157
3158 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3159 CU_ASSERT_EQUAL(r, 0);
3160
3161 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3162 AMDGPU_GEM_DOMAIN_GTT, 0,
3163 &bo_cmd, (void **)&ptr_cmd,
3164 &mc_address_cmd, &va_cmd);
3165 CU_ASSERT_EQUAL(r, 0);
3166 memset(ptr_cmd, 0, bo_cmd_size);
3167
3168 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3169 AMDGPU_GEM_DOMAIN_VRAM, 0,
3170 &bo_dst, (void **)&ptr_dst,
3171 &mc_address_dst, &va_dst);
3172 CU_ASSERT_EQUAL(r, 0);
3173
3174 i = 0;
3175 i += amdgpu_draw_init(ptr_cmd + i);
3176
3177 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3178
3179 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3180
3181 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3182
3183 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3184
3185 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3186 ptr_cmd[i++] = 0xc;
3187 ptr_cmd[i++] = 0x33333333;
3188 ptr_cmd[i++] = 0x33333333;
3189 ptr_cmd[i++] = 0x33333333;
3190 ptr_cmd[i++] = 0x33333333;
3191
3192 i += amdgpu_draw_draw(ptr_cmd + i);
3193
3194 while (i & 7)
3195 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3196
3197 resources[0] = bo_dst;
3198 resources[1] = bo_shader_ps;
3199 resources[2] = bo_shader_vs;
3200 resources[3] = bo_cmd;
3201 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3202 CU_ASSERT_EQUAL(r, 0);
3203
3204 ib_info.ib_mc_address = mc_address_cmd;
3205 ib_info.size = i;
3206 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3207 ibs_request.ring = ring_id;
3208 ibs_request.resources = bo_list;
3209 ibs_request.number_of_ibs = 1;
3210 ibs_request.ibs = &ib_info;
3211 ibs_request.fence_info.handle = NULL;
3212
3213 /* submit CS */
3214 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3215 CU_ASSERT_EQUAL(r, 0);
3216
3217 r = amdgpu_bo_list_destroy(bo_list);
3218 CU_ASSERT_EQUAL(r, 0);
3219
3220 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3221 fence_status.ip_instance = 0;
3222 fence_status.ring = ring_id;
3223 fence_status.context = context_handle;
3224 fence_status.fence = ibs_request.seq_no;
3225
3226 /* wait for IB accomplished */
3227 r = amdgpu_cs_query_fence_status(&fence_status,
3228 AMDGPU_TIMEOUT_INFINITE,
3229 0, &expired);
3230 CU_ASSERT_EQUAL(r, 0);
3231 CU_ASSERT_EQUAL(expired, true);
3232
3233 /* verify if memset test result meets with expected */
3234 i = 0;
3235 while(i < bo_dst_size) {
3236 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3237 }
3238
3239 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3240 CU_ASSERT_EQUAL(r, 0);
3241
3242 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3243 CU_ASSERT_EQUAL(r, 0);
3244
3245 r = amdgpu_cs_ctx_free(context_handle);
3246 CU_ASSERT_EQUAL(r, 0);
3247 }
3248
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3249 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3250 uint32_t ring)
3251 {
3252 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3253 void *ptr_shader_ps;
3254 void *ptr_shader_vs;
3255 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3256 amdgpu_va_handle va_shader_ps, va_shader_vs;
3257 int r;
3258 int bo_shader_size = 4096;
3259
3260 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3261 AMDGPU_GEM_DOMAIN_VRAM, 0,
3262 &bo_shader_ps, &ptr_shader_ps,
3263 &mc_address_shader_ps, &va_shader_ps);
3264 CU_ASSERT_EQUAL(r, 0);
3265 memset(ptr_shader_ps, 0, bo_shader_size);
3266
3267 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3268 AMDGPU_GEM_DOMAIN_VRAM, 0,
3269 &bo_shader_vs, &ptr_shader_vs,
3270 &mc_address_shader_vs, &va_shader_vs);
3271 CU_ASSERT_EQUAL(r, 0);
3272 memset(ptr_shader_vs, 0, bo_shader_size);
3273
3274 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3275 CU_ASSERT_EQUAL(r, 0);
3276
3277 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3278 CU_ASSERT_EQUAL(r, 0);
3279
3280 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3281 mc_address_shader_ps, mc_address_shader_vs, ring);
3282
3283 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3284 CU_ASSERT_EQUAL(r, 0);
3285
3286 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3287 CU_ASSERT_EQUAL(r, 0);
3288 }
3289
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3290 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3291 amdgpu_bo_handle bo_shader_ps,
3292 amdgpu_bo_handle bo_shader_vs,
3293 uint64_t mc_address_shader_ps,
3294 uint64_t mc_address_shader_vs,
3295 uint32_t ring, int hang)
3296 {
3297 amdgpu_context_handle context_handle;
3298 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3299 volatile unsigned char *ptr_dst;
3300 unsigned char *ptr_src;
3301 uint32_t *ptr_cmd;
3302 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3303 amdgpu_va_handle va_dst, va_src, va_cmd;
3304 int i, r;
3305 int bo_size = 16384;
3306 int bo_cmd_size = 4096;
3307 struct amdgpu_cs_request ibs_request = {0};
3308 struct amdgpu_cs_ib_info ib_info= {0};
3309 uint32_t hang_state, hangs;
3310 uint32_t expired;
3311 amdgpu_bo_list_handle bo_list;
3312 struct amdgpu_cs_fence fence_status = {0};
3313
3314 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3315 CU_ASSERT_EQUAL(r, 0);
3316
3317 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3318 AMDGPU_GEM_DOMAIN_GTT, 0,
3319 &bo_cmd, (void **)&ptr_cmd,
3320 &mc_address_cmd, &va_cmd);
3321 CU_ASSERT_EQUAL(r, 0);
3322 memset(ptr_cmd, 0, bo_cmd_size);
3323
3324 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3325 AMDGPU_GEM_DOMAIN_VRAM, 0,
3326 &bo_src, (void **)&ptr_src,
3327 &mc_address_src, &va_src);
3328 CU_ASSERT_EQUAL(r, 0);
3329
3330 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3331 AMDGPU_GEM_DOMAIN_VRAM, 0,
3332 &bo_dst, (void **)&ptr_dst,
3333 &mc_address_dst, &va_dst);
3334 CU_ASSERT_EQUAL(r, 0);
3335
3336 memset(ptr_src, 0x55, bo_size);
3337
3338 i = 0;
3339 i += amdgpu_draw_init(ptr_cmd + i);
3340
3341 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3342
3343 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3344
3345 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3346
3347 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3348
3349 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3350 ptr_cmd[i++] = 0xc;
3351 ptr_cmd[i++] = mc_address_src >> 8;
3352 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3353 ptr_cmd[i++] = 0x7c01f;
3354 ptr_cmd[i++] = 0x90500fac;
3355 ptr_cmd[i++] = 0x3e000;
3356 i += 3;
3357
3358 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3359 ptr_cmd[i++] = 0x14;
3360 ptr_cmd[i++] = 0x92;
3361 i += 3;
3362
3363 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3364 ptr_cmd[i++] = 0x191;
3365 ptr_cmd[i++] = 0;
3366
3367 i += amdgpu_draw_draw(ptr_cmd + i);
3368
3369 while (i & 7)
3370 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3371
3372 resources[0] = bo_dst;
3373 resources[1] = bo_src;
3374 resources[2] = bo_shader_ps;
3375 resources[3] = bo_shader_vs;
3376 resources[4] = bo_cmd;
3377 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3378 CU_ASSERT_EQUAL(r, 0);
3379
3380 ib_info.ib_mc_address = mc_address_cmd;
3381 ib_info.size = i;
3382 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3383 ibs_request.ring = ring;
3384 ibs_request.resources = bo_list;
3385 ibs_request.number_of_ibs = 1;
3386 ibs_request.ibs = &ib_info;
3387 ibs_request.fence_info.handle = NULL;
3388 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3389 CU_ASSERT_EQUAL(r, 0);
3390
3391 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3392 fence_status.ip_instance = 0;
3393 fence_status.ring = ring;
3394 fence_status.context = context_handle;
3395 fence_status.fence = ibs_request.seq_no;
3396
3397 /* wait for IB accomplished */
3398 r = amdgpu_cs_query_fence_status(&fence_status,
3399 AMDGPU_TIMEOUT_INFINITE,
3400 0, &expired);
3401 if (!hang) {
3402 CU_ASSERT_EQUAL(r, 0);
3403 CU_ASSERT_EQUAL(expired, true);
3404
3405 /* verify if memcpy test result meets with expected */
3406 i = 0;
3407 while(i < bo_size) {
3408 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3409 i++;
3410 }
3411 } else {
3412 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3413 CU_ASSERT_EQUAL(r, 0);
3414 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3415 }
3416
3417 r = amdgpu_bo_list_destroy(bo_list);
3418 CU_ASSERT_EQUAL(r, 0);
3419
3420 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3421 CU_ASSERT_EQUAL(r, 0);
3422 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3423 CU_ASSERT_EQUAL(r, 0);
3424
3425 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3426 CU_ASSERT_EQUAL(r, 0);
3427
3428 r = amdgpu_cs_ctx_free(context_handle);
3429 CU_ASSERT_EQUAL(r, 0);
3430 }
3431
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3432 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3433 int hang)
3434 {
3435 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3436 void *ptr_shader_ps;
3437 void *ptr_shader_vs;
3438 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3439 amdgpu_va_handle va_shader_ps, va_shader_vs;
3440 int bo_shader_size = 4096;
3441 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3442 int r;
3443
3444 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445 AMDGPU_GEM_DOMAIN_VRAM, 0,
3446 &bo_shader_ps, &ptr_shader_ps,
3447 &mc_address_shader_ps, &va_shader_ps);
3448 CU_ASSERT_EQUAL(r, 0);
3449 memset(ptr_shader_ps, 0, bo_shader_size);
3450
3451 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452 AMDGPU_GEM_DOMAIN_VRAM, 0,
3453 &bo_shader_vs, &ptr_shader_vs,
3454 &mc_address_shader_vs, &va_shader_vs);
3455 CU_ASSERT_EQUAL(r, 0);
3456 memset(ptr_shader_vs, 0, bo_shader_size);
3457
3458 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3459 CU_ASSERT_EQUAL(r, 0);
3460
3461 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462 CU_ASSERT_EQUAL(r, 0);
3463
3464 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465 mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3466
3467 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468 CU_ASSERT_EQUAL(r, 0);
3469
3470 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471 CU_ASSERT_EQUAL(r, 0);
3472 }
3473
amdgpu_draw_test(void)3474 static void amdgpu_draw_test(void)
3475 {
3476 int r;
3477 struct drm_amdgpu_info_hw_ip info;
3478 uint32_t ring_id;
3479
3480 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3481 CU_ASSERT_EQUAL(r, 0);
3482 if (!info.available_rings)
3483 printf("SKIP ... as there's no graphics ring\n");
3484
3485 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3486 amdgpu_memset_draw_test(device_handle, ring_id);
3487 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3488 }
3489 }
3490
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3491 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3492 {
3493 amdgpu_context_handle context_handle;
3494 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3495 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3496 void *ptr_shader_ps;
3497 void *ptr_shader_vs;
3498 volatile unsigned char *ptr_dst;
3499 unsigned char *ptr_src;
3500 uint32_t *ptr_cmd;
3501 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3502 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3503 amdgpu_va_handle va_shader_ps, va_shader_vs;
3504 amdgpu_va_handle va_dst, va_src, va_cmd;
3505 struct amdgpu_gpu_info gpu_info = {0};
3506 int i, r;
3507 int bo_size = 0x4000000;
3508 int bo_shader_ps_size = 0x400000;
3509 int bo_shader_vs_size = 4096;
3510 int bo_cmd_size = 4096;
3511 struct amdgpu_cs_request ibs_request = {0};
3512 struct amdgpu_cs_ib_info ib_info= {0};
3513 uint32_t hang_state, hangs, expired;
3514 amdgpu_bo_list_handle bo_list;
3515 struct amdgpu_cs_fence fence_status = {0};
3516
3517 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3518 CU_ASSERT_EQUAL(r, 0);
3519
3520 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3521 CU_ASSERT_EQUAL(r, 0);
3522
3523 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3524 AMDGPU_GEM_DOMAIN_GTT, 0,
3525 &bo_cmd, (void **)&ptr_cmd,
3526 &mc_address_cmd, &va_cmd);
3527 CU_ASSERT_EQUAL(r, 0);
3528 memset(ptr_cmd, 0, bo_cmd_size);
3529
3530 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3531 AMDGPU_GEM_DOMAIN_VRAM, 0,
3532 &bo_shader_ps, &ptr_shader_ps,
3533 &mc_address_shader_ps, &va_shader_ps);
3534 CU_ASSERT_EQUAL(r, 0);
3535 memset(ptr_shader_ps, 0, bo_shader_ps_size);
3536
3537 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3538 AMDGPU_GEM_DOMAIN_VRAM, 0,
3539 &bo_shader_vs, &ptr_shader_vs,
3540 &mc_address_shader_vs, &va_shader_vs);
3541 CU_ASSERT_EQUAL(r, 0);
3542 memset(ptr_shader_vs, 0, bo_shader_vs_size);
3543
3544 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3545 CU_ASSERT_EQUAL(r, 0);
3546
3547 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3548 CU_ASSERT_EQUAL(r, 0);
3549
3550 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3551 AMDGPU_GEM_DOMAIN_VRAM, 0,
3552 &bo_src, (void **)&ptr_src,
3553 &mc_address_src, &va_src);
3554 CU_ASSERT_EQUAL(r, 0);
3555
3556 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3557 AMDGPU_GEM_DOMAIN_VRAM, 0,
3558 &bo_dst, (void **)&ptr_dst,
3559 &mc_address_dst, &va_dst);
3560 CU_ASSERT_EQUAL(r, 0);
3561
3562 memset(ptr_src, 0x55, bo_size);
3563
3564 i = 0;
3565 i += amdgpu_draw_init(ptr_cmd + i);
3566
3567 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3568
3569 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3570
3571 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3572 mc_address_shader_vs, 1);
3573
3574 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3575
3576 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3577 ptr_cmd[i++] = 0xc;
3578 ptr_cmd[i++] = mc_address_src >> 8;
3579 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3580 ptr_cmd[i++] = 0x1ffc7ff;
3581 ptr_cmd[i++] = 0x90500fac;
3582 ptr_cmd[i++] = 0xffe000;
3583 i += 3;
3584
3585 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3586 ptr_cmd[i++] = 0x14;
3587 ptr_cmd[i++] = 0x92;
3588 i += 3;
3589
3590 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3591 ptr_cmd[i++] = 0x191;
3592 ptr_cmd[i++] = 0;
3593
3594 i += amdgpu_draw_draw(ptr_cmd + i);
3595
3596 while (i & 7)
3597 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3598
3599 resources[0] = bo_dst;
3600 resources[1] = bo_src;
3601 resources[2] = bo_shader_ps;
3602 resources[3] = bo_shader_vs;
3603 resources[4] = bo_cmd;
3604 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3605 CU_ASSERT_EQUAL(r, 0);
3606
3607 ib_info.ib_mc_address = mc_address_cmd;
3608 ib_info.size = i;
3609 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3610 ibs_request.ring = ring;
3611 ibs_request.resources = bo_list;
3612 ibs_request.number_of_ibs = 1;
3613 ibs_request.ibs = &ib_info;
3614 ibs_request.fence_info.handle = NULL;
3615 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3616 CU_ASSERT_EQUAL(r, 0);
3617
3618 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3619 fence_status.ip_instance = 0;
3620 fence_status.ring = ring;
3621 fence_status.context = context_handle;
3622 fence_status.fence = ibs_request.seq_no;
3623
3624 /* wait for IB accomplished */
3625 r = amdgpu_cs_query_fence_status(&fence_status,
3626 AMDGPU_TIMEOUT_INFINITE,
3627 0, &expired);
3628
3629 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3630 CU_ASSERT_EQUAL(r, 0);
3631 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3632
3633 r = amdgpu_bo_list_destroy(bo_list);
3634 CU_ASSERT_EQUAL(r, 0);
3635
3636 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3637 CU_ASSERT_EQUAL(r, 0);
3638 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3639 CU_ASSERT_EQUAL(r, 0);
3640
3641 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3642 CU_ASSERT_EQUAL(r, 0);
3643
3644 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3645 CU_ASSERT_EQUAL(r, 0);
3646 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3647 CU_ASSERT_EQUAL(r, 0);
3648
3649 r = amdgpu_cs_ctx_free(context_handle);
3650 CU_ASSERT_EQUAL(r, 0);
3651 }
3652
amdgpu_gpu_reset_test(void)3653 static void amdgpu_gpu_reset_test(void)
3654 {
3655 int r;
3656 char debugfs_path[256], tmp[10];
3657 int fd;
3658 struct stat sbuf;
3659 amdgpu_context_handle context_handle;
3660 uint32_t hang_state, hangs;
3661
3662 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3663 CU_ASSERT_EQUAL(r, 0);
3664
3665 r = fstat(drm_amdgpu[0], &sbuf);
3666 CU_ASSERT_EQUAL(r, 0);
3667
3668 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3669 fd = open(debugfs_path, O_RDONLY);
3670 CU_ASSERT(fd >= 0);
3671
3672 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3673 CU_ASSERT(r > 0);
3674
3675 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3676 CU_ASSERT_EQUAL(r, 0);
3677 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3678
3679 close(fd);
3680 r = amdgpu_cs_ctx_free(context_handle);
3681 CU_ASSERT_EQUAL(r, 0);
3682
3683 amdgpu_compute_dispatch_test();
3684 amdgpu_gfx_dispatch_test();
3685 }
3686