1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
49 static uint32_t chip_id;
50 static uint32_t chip_rev;
51
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65
66 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
67 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
68 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
69 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
70 unsigned ip_type,
71 int instance, int pm4_dw, uint32_t *pm4_src,
72 int res_cnt, amdgpu_bo_handle *resources,
73 struct amdgpu_cs_ib_info *ib_info,
74 struct amdgpu_cs_request *ibs_request);
75
76 CU_TestInfo basic_tests[] = {
77 { "Query Info Test", amdgpu_query_info_test },
78 { "Userptr Test", amdgpu_userptr_test },
79 { "bo eviction Test", amdgpu_bo_eviction_test },
80 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
81 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
82 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
83 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
84 { "SW semaphore Test", amdgpu_semaphore_test },
85 { "Sync dependency Test", amdgpu_sync_dependency_test },
86 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
87 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
88 { "Draw Test", amdgpu_draw_test },
89 { "GPU reset Test", amdgpu_gpu_reset_test },
90 CU_TEST_INFO_NULL,
91 };
92 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
93 #define SDMA_PKT_HEADER_op_offset 0
94 #define SDMA_PKT_HEADER_op_mask 0x000000FF
95 #define SDMA_PKT_HEADER_op_shift 0
96 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
97 #define SDMA_OPCODE_CONSTANT_FILL 11
98 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
99 /* 0 = byte fill
100 * 2 = DW fill
101 */
102 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
103 (((sub_op) & 0xFF) << 8) | \
104 (((op) & 0xFF) << 0))
105 #define SDMA_OPCODE_WRITE 2
106 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
107 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
108
109 #define SDMA_OPCODE_COPY 1
110 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
111
112 #define SDMA_OPCODE_ATOMIC 10
113 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
114 /* 0 - single_pass_atomic.
115 * 1 - loop_until_compare_satisfied.
116 */
117 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
118 /* 0 - non-TMZ.
119 * 1 - TMZ.
120 */
121 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
122 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
123 * same as Packet 3
124 */
125
126 #define GFX_COMPUTE_NOP 0xffff1000
127 #define SDMA_NOP 0x0
128
129 /* PM4 */
130 #define PACKET_TYPE0 0
131 #define PACKET_TYPE1 1
132 #define PACKET_TYPE2 2
133 #define PACKET_TYPE3 3
134
135 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
136 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
137 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
138 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
139 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
140 ((reg) & 0xFFFF) | \
141 ((n) & 0x3FFF) << 16)
142 #define CP_PACKET2 0x80000000
143 #define PACKET2_PAD_SHIFT 0
144 #define PACKET2_PAD_MASK (0x3fffffff << 0)
145
146 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
147
148 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
149 (((op) & 0xFF) << 8) | \
150 ((n) & 0x3FFF) << 16)
151 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
152
153 /* Packet 3 types */
154 #define PACKET3_NOP 0x10
155
156 #define PACKET3_WRITE_DATA 0x37
157 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
158 /* 0 - register
159 * 1 - memory (sync - via GRBM)
160 * 2 - gl2
161 * 3 - gds
162 * 4 - reserved
163 * 5 - memory (async - direct)
164 */
165 #define WR_ONE_ADDR (1 << 16)
166 #define WR_CONFIRM (1 << 20)
167 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
168 /* 0 - LRU
169 * 1 - Stream
170 */
171 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
172 /* 0 - me
173 * 1 - pfp
174 * 2 - ce
175 */
176
177 #define PACKET3_ATOMIC_MEM 0x1E
178 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
179 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
180 /* 0 - single_pass_atomic.
181 * 1 - loop_until_compare_satisfied.
182 */
183 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
184 /* 0 - lru.
185 * 1 - stream.
186 */
187 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
188 /* 0 - micro_engine.
189 */
190
191 #define PACKET3_DMA_DATA 0x50
192 /* 1. header
193 * 2. CONTROL
194 * 3. SRC_ADDR_LO or DATA [31:0]
195 * 4. SRC_ADDR_HI [31:0]
196 * 5. DST_ADDR_LO [31:0]
197 * 6. DST_ADDR_HI [7:0]
198 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
199 */
200 /* CONTROL */
201 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
202 /* 0 - ME
203 * 1 - PFP
204 */
205 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
206 /* 0 - LRU
207 * 1 - Stream
208 * 2 - Bypass
209 */
210 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
211 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
212 /* 0 - DST_ADDR using DAS
213 * 1 - GDS
214 * 3 - DST_ADDR using L2
215 */
216 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
217 /* 0 - LRU
218 * 1 - Stream
219 * 2 - Bypass
220 */
221 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
222 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
223 /* 0 - SRC_ADDR using SAS
224 * 1 - GDS
225 * 2 - DATA
226 * 3 - SRC_ADDR using L2
227 */
228 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
229 /* COMMAND */
230 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
231 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
232 /* 0 - none
233 * 1 - 8 in 16
234 * 2 - 8 in 32
235 * 3 - 8 in 64
236 */
237 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
238 /* 0 - none
239 * 1 - 8 in 16
240 * 2 - 8 in 32
241 * 3 - 8 in 64
242 */
243 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
244 /* 0 - memory
245 * 1 - register
246 */
247 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
248 /* 0 - memory
249 * 1 - register
250 */
251 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
252 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
253 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
254
255 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
256 (((b) & 0x1) << 26) | \
257 (((t) & 0x1) << 23) | \
258 (((s) & 0x1) << 22) | \
259 (((cnt) & 0xFFFFF) << 0))
260 #define SDMA_OPCODE_COPY_SI 3
261 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
262 #define SDMA_NOP_SI 0xf
263 #define GFX_COMPUTE_NOP_SI 0x80000000
264 #define PACKET3_DMA_DATA_SI 0x41
265 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
266 /* 0 - ME
267 * 1 - PFP
268 */
269 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
270 /* 0 - DST_ADDR using DAS
271 * 1 - GDS
272 * 3 - DST_ADDR using L2
273 */
274 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
275 /* 0 - SRC_ADDR using SAS
276 * 1 - GDS
277 * 2 - DATA
278 * 3 - SRC_ADDR using L2
279 */
280 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
281
282
283 #define PKT3_CONTEXT_CONTROL 0x28
284 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
285 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
286 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
287
288 #define PKT3_CLEAR_STATE 0x12
289
290 #define PKT3_SET_SH_REG 0x76
291 #define PACKET3_SET_SH_REG_START 0x00002c00
292
293 #define PACKET3_DISPATCH_DIRECT 0x15
294 #define PACKET3_EVENT_WRITE 0x46
295 #define PACKET3_ACQUIRE_MEM 0x58
296 #define PACKET3_SET_CONTEXT_REG 0x69
297 #define PACKET3_SET_UCONFIG_REG 0x79
298 #define PACKET3_DRAW_INDEX_AUTO 0x2D
299 /* gfx 8 */
300 #define mmCOMPUTE_PGM_LO 0x2e0c
301 #define mmCOMPUTE_PGM_RSRC1 0x2e12
302 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
303 #define mmCOMPUTE_USER_DATA_0 0x2e40
304 #define mmCOMPUTE_USER_DATA_1 0x2e41
305 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
306 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
307
308
309
310 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
311 ((num & 0x0000ff00) << 8) | \
312 ((num & 0x00ff0000) >> 8) | \
313 ((num & 0x000000ff) << 24))
314
315
316 /* Shader code
317 * void main()
318 {
319
320 float x = some_input;
321 for (unsigned i = 0; i < 1000000; i++)
322 x = sin(x);
323
324 u[0] = 42u;
325 }
326 */
327
328 static uint32_t shader_bin[] = {
329 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
330 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
331 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
332 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
333 };
334
335 #define CODE_OFFSET 512
336 #define DATA_OFFSET 1024
337
338 enum cs_type {
339 CS_BUFFERCLEAR,
340 CS_BUFFERCOPY,
341 CS_HANG,
342 CS_HANG_SLOW
343 };
344
345 static const uint32_t bufferclear_cs_shader_gfx9[] = {
346 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
347 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
348 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
349 0xbf810000
350 };
351
352 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
353 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
354 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
355 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
356 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
357 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
358 };
359
360 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
361
362 static const uint32_t buffercopy_cs_shader_gfx9[] = {
363 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
364 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
365 0xe01c2000, 0x80010200, 0xbf810000
366 };
367
368 static const uint32_t preamblecache_gfx9[] = {
369 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
370 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
371 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
372 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
373 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
374 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
375 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
376 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
377 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
378 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
379 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
380 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
381 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
382 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
383 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
384 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
385 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
386 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
387 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
388 0xc0017900, 0x24b, 0x0
389 };
390
391 enum ps_type {
392 PS_CONST,
393 PS_TEX,
394 PS_HANG,
395 PS_HANG_SLOW
396 };
397
398 static const uint32_t ps_const_shader_gfx9[] = {
399 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
400 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
401 0xC4001C0F, 0x00000100, 0xBF810000
402 };
403
404 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
405
406 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
407 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
408 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
409 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
410 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
411 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
412 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
413 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
414 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
415 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
416 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
417 }
418 };
419
420 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
421 0x00000004
422 };
423
424 static const uint32_t ps_num_sh_registers_gfx9 = 2;
425
426 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
427 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
428 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
429 };
430
431 static const uint32_t ps_num_context_registers_gfx9 = 7;
432
433 static const uint32_t ps_const_context_reg_gfx9[][2] = {
434 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
435 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
436 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
437 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
438 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
439 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
440 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
441 };
442
443 static const uint32_t ps_tex_shader_gfx9[] = {
444 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
445 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
446 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
447 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
448 0x00000100, 0xBF810000
449 };
450
451 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
452 0x0000000B
453 };
454
455 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
456
457 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
458 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
459 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
460 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
461 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
462 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
464 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
465 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
466 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
467 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
468 }
469 };
470
471 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
472 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
473 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
474 };
475
476 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
477 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
478 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
479 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
480 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
481 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
482 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
483 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
484 };
485
486 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
487 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
488 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
489 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
490 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
491 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
492 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
493 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
494 0xC400020F, 0x05060403, 0xBF810000
495 };
496
497 static const uint32_t cached_cmd_gfx9[] = {
498 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
499 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
500 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
501 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
502 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
503 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
504 0xc0026900, 0x292, 0x20, 0x60201b8,
505 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
506 };
507
508 unsigned int memcpy_ps_hang[] = {
509 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
510 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
511 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
512 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
513 0xF800180F, 0x03020100, 0xBF810000
514 };
515
516 struct amdgpu_test_shader {
517 uint32_t *shader;
518 uint32_t header_length;
519 uint32_t body_length;
520 uint32_t foot_length;
521 };
522
523 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
524 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
525 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
526 };
527
528 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
529 memcpy_cs_hang_slow_ai_codes,
530 4,
531 3,
532 1
533 };
534
535 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
536 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
537 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
538 };
539
540 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
541 memcpy_cs_hang_slow_rv_codes,
542 4,
543 3,
544 1
545 };
546
547 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
548 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
549 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
550 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
551 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
552 0x03020100, 0xbf810000
553 };
554
555 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
556 memcpy_ps_hang_slow_ai_codes,
557 7,
558 2,
559 9
560 };
561
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)562 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
563 unsigned alignment, unsigned heap, uint64_t alloc_flags,
564 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
565 uint64_t *mc_address,
566 amdgpu_va_handle *va_handle)
567 {
568 struct amdgpu_bo_alloc_request request = {};
569 amdgpu_bo_handle buf_handle;
570 amdgpu_va_handle handle;
571 uint64_t vmc_addr;
572 int r;
573
574 request.alloc_size = size;
575 request.phys_alignment = alignment;
576 request.preferred_heap = heap;
577 request.flags = alloc_flags;
578
579 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
580 if (r)
581 return r;
582
583 r = amdgpu_va_range_alloc(dev,
584 amdgpu_gpu_va_range_general,
585 size, alignment, 0, &vmc_addr,
586 &handle, 0);
587 if (r)
588 goto error_va_alloc;
589
590 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
591 AMDGPU_VM_PAGE_READABLE |
592 AMDGPU_VM_PAGE_WRITEABLE |
593 AMDGPU_VM_PAGE_EXECUTABLE |
594 mapping_flags,
595 AMDGPU_VA_OP_MAP);
596 if (r)
597 goto error_va_map;
598
599 r = amdgpu_bo_cpu_map(buf_handle, cpu);
600 if (r)
601 goto error_cpu_map;
602
603 *bo = buf_handle;
604 *mc_address = vmc_addr;
605 *va_handle = handle;
606
607 return 0;
608
609 error_cpu_map:
610 amdgpu_bo_cpu_unmap(buf_handle);
611
612 error_va_map:
613 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
614
615 error_va_alloc:
616 amdgpu_bo_free(buf_handle);
617 return r;
618 }
619
620
621
suite_basic_tests_enable(void)622 CU_BOOL suite_basic_tests_enable(void)
623 {
624
625 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
626 &minor_version, &device_handle))
627 return CU_FALSE;
628
629
630 family_id = device_handle->info.family_id;
631 chip_id = device_handle->info.chip_external_rev;
632 chip_rev = device_handle->info.chip_rev;
633
634 if (amdgpu_device_deinitialize(device_handle))
635 return CU_FALSE;
636
637 /* disable gfx engine basic test cases for some asics have no CPG */
638 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
639 if (amdgpu_set_test_active("Basic Tests",
640 "Command submission Test (GFX)",
641 CU_FALSE))
642 fprintf(stderr, "test deactivation failed - %s\n",
643 CU_get_error_msg());
644
645 if (amdgpu_set_test_active("Basic Tests",
646 "Command submission Test (Multi-Fence)",
647 CU_FALSE))
648 fprintf(stderr, "test deactivation failed - %s\n",
649 CU_get_error_msg());
650
651 if (amdgpu_set_test_active("Basic Tests",
652 "Sync dependency Test",
653 CU_FALSE))
654 fprintf(stderr, "test deactivation failed - %s\n",
655 CU_get_error_msg());
656 }
657
658 return CU_TRUE;
659 }
660
suite_basic_tests_init(void)661 int suite_basic_tests_init(void)
662 {
663 struct amdgpu_gpu_info gpu_info = {0};
664 int r;
665
666 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
667 &minor_version, &device_handle);
668
669 if (r) {
670 if ((r == -EACCES) && (errno == EACCES))
671 printf("\n\nError:%s. "
672 "Hint:Try to run this test program as root.",
673 strerror(errno));
674 return CUE_SINIT_FAILED;
675 }
676
677 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
678 if (r)
679 return CUE_SINIT_FAILED;
680
681 family_id = gpu_info.family_id;
682
683 return CUE_SUCCESS;
684 }
685
suite_basic_tests_clean(void)686 int suite_basic_tests_clean(void)
687 {
688 int r = amdgpu_device_deinitialize(device_handle);
689
690 if (r == 0)
691 return CUE_SUCCESS;
692 else
693 return CUE_SCLEAN_FAILED;
694 }
695
amdgpu_query_info_test(void)696 static void amdgpu_query_info_test(void)
697 {
698 struct amdgpu_gpu_info gpu_info = {0};
699 uint32_t version, feature;
700 int r;
701
702 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
703 CU_ASSERT_EQUAL(r, 0);
704
705 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
706 0, &version, &feature);
707 CU_ASSERT_EQUAL(r, 0);
708 }
709
amdgpu_command_submission_gfx_separate_ibs(void)710 static void amdgpu_command_submission_gfx_separate_ibs(void)
711 {
712 amdgpu_context_handle context_handle;
713 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
714 void *ib_result_cpu, *ib_result_ce_cpu;
715 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
716 struct amdgpu_cs_request ibs_request = {0};
717 struct amdgpu_cs_ib_info ib_info[2];
718 struct amdgpu_cs_fence fence_status = {0};
719 uint32_t *ptr;
720 uint32_t expired;
721 amdgpu_bo_list_handle bo_list;
722 amdgpu_va_handle va_handle, va_handle_ce;
723 int r, i = 0;
724
725 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
726 CU_ASSERT_EQUAL(r, 0);
727
728 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729 AMDGPU_GEM_DOMAIN_GTT, 0,
730 &ib_result_handle, &ib_result_cpu,
731 &ib_result_mc_address, &va_handle);
732 CU_ASSERT_EQUAL(r, 0);
733
734 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
735 AMDGPU_GEM_DOMAIN_GTT, 0,
736 &ib_result_ce_handle, &ib_result_ce_cpu,
737 &ib_result_ce_mc_address, &va_handle_ce);
738 CU_ASSERT_EQUAL(r, 0);
739
740 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
741 ib_result_ce_handle, &bo_list);
742 CU_ASSERT_EQUAL(r, 0);
743
744 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
745
746 /* IT_SET_CE_DE_COUNTERS */
747 ptr = ib_result_ce_cpu;
748 if (family_id != AMDGPU_FAMILY_SI) {
749 ptr[i++] = 0xc0008900;
750 ptr[i++] = 0;
751 }
752 ptr[i++] = 0xc0008400;
753 ptr[i++] = 1;
754 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
755 ib_info[0].size = i;
756 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
757
758 /* IT_WAIT_ON_CE_COUNTER */
759 ptr = ib_result_cpu;
760 ptr[0] = 0xc0008600;
761 ptr[1] = 0x00000001;
762 ib_info[1].ib_mc_address = ib_result_mc_address;
763 ib_info[1].size = 2;
764
765 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
766 ibs_request.number_of_ibs = 2;
767 ibs_request.ibs = ib_info;
768 ibs_request.resources = bo_list;
769 ibs_request.fence_info.handle = NULL;
770
771 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
772
773 CU_ASSERT_EQUAL(r, 0);
774
775 fence_status.context = context_handle;
776 fence_status.ip_type = AMDGPU_HW_IP_GFX;
777 fence_status.ip_instance = 0;
778 fence_status.fence = ibs_request.seq_no;
779
780 r = amdgpu_cs_query_fence_status(&fence_status,
781 AMDGPU_TIMEOUT_INFINITE,
782 0, &expired);
783 CU_ASSERT_EQUAL(r, 0);
784
785 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
786 ib_result_mc_address, 4096);
787 CU_ASSERT_EQUAL(r, 0);
788
789 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
790 ib_result_ce_mc_address, 4096);
791 CU_ASSERT_EQUAL(r, 0);
792
793 r = amdgpu_bo_list_destroy(bo_list);
794 CU_ASSERT_EQUAL(r, 0);
795
796 r = amdgpu_cs_ctx_free(context_handle);
797 CU_ASSERT_EQUAL(r, 0);
798
799 }
800
amdgpu_command_submission_gfx_shared_ib(void)801 static void amdgpu_command_submission_gfx_shared_ib(void)
802 {
803 amdgpu_context_handle context_handle;
804 amdgpu_bo_handle ib_result_handle;
805 void *ib_result_cpu;
806 uint64_t ib_result_mc_address;
807 struct amdgpu_cs_request ibs_request = {0};
808 struct amdgpu_cs_ib_info ib_info[2];
809 struct amdgpu_cs_fence fence_status = {0};
810 uint32_t *ptr;
811 uint32_t expired;
812 amdgpu_bo_list_handle bo_list;
813 amdgpu_va_handle va_handle;
814 int r, i = 0;
815
816 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
817 CU_ASSERT_EQUAL(r, 0);
818
819 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
820 AMDGPU_GEM_DOMAIN_GTT, 0,
821 &ib_result_handle, &ib_result_cpu,
822 &ib_result_mc_address, &va_handle);
823 CU_ASSERT_EQUAL(r, 0);
824
825 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
826 &bo_list);
827 CU_ASSERT_EQUAL(r, 0);
828
829 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
830
831 /* IT_SET_CE_DE_COUNTERS */
832 ptr = ib_result_cpu;
833 if (family_id != AMDGPU_FAMILY_SI) {
834 ptr[i++] = 0xc0008900;
835 ptr[i++] = 0;
836 }
837 ptr[i++] = 0xc0008400;
838 ptr[i++] = 1;
839 ib_info[0].ib_mc_address = ib_result_mc_address;
840 ib_info[0].size = i;
841 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
842
843 ptr = (uint32_t *)ib_result_cpu + 4;
844 ptr[0] = 0xc0008600;
845 ptr[1] = 0x00000001;
846 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
847 ib_info[1].size = 2;
848
849 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
850 ibs_request.number_of_ibs = 2;
851 ibs_request.ibs = ib_info;
852 ibs_request.resources = bo_list;
853 ibs_request.fence_info.handle = NULL;
854
855 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
856
857 CU_ASSERT_EQUAL(r, 0);
858
859 fence_status.context = context_handle;
860 fence_status.ip_type = AMDGPU_HW_IP_GFX;
861 fence_status.ip_instance = 0;
862 fence_status.fence = ibs_request.seq_no;
863
864 r = amdgpu_cs_query_fence_status(&fence_status,
865 AMDGPU_TIMEOUT_INFINITE,
866 0, &expired);
867 CU_ASSERT_EQUAL(r, 0);
868
869 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
870 ib_result_mc_address, 4096);
871 CU_ASSERT_EQUAL(r, 0);
872
873 r = amdgpu_bo_list_destroy(bo_list);
874 CU_ASSERT_EQUAL(r, 0);
875
876 r = amdgpu_cs_ctx_free(context_handle);
877 CU_ASSERT_EQUAL(r, 0);
878 }
879
amdgpu_command_submission_gfx_cp_write_data(void)880 static void amdgpu_command_submission_gfx_cp_write_data(void)
881 {
882 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
883 }
884
amdgpu_command_submission_gfx_cp_const_fill(void)885 static void amdgpu_command_submission_gfx_cp_const_fill(void)
886 {
887 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
888 }
889
amdgpu_command_submission_gfx_cp_copy_data(void)890 static void amdgpu_command_submission_gfx_cp_copy_data(void)
891 {
892 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
893 }
894
amdgpu_bo_eviction_test(void)895 static void amdgpu_bo_eviction_test(void)
896 {
897 const int sdma_write_length = 1024;
898 const int pm4_dw = 256;
899 amdgpu_context_handle context_handle;
900 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
901 amdgpu_bo_handle *resources;
902 uint32_t *pm4;
903 struct amdgpu_cs_ib_info *ib_info;
904 struct amdgpu_cs_request *ibs_request;
905 uint64_t bo1_mc, bo2_mc;
906 volatile unsigned char *bo1_cpu, *bo2_cpu;
907 int i, j, r, loop1, loop2;
908 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
909 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
910 struct amdgpu_heap_info vram_info, gtt_info;
911
912 pm4 = calloc(pm4_dw, sizeof(*pm4));
913 CU_ASSERT_NOT_EQUAL(pm4, NULL);
914
915 ib_info = calloc(1, sizeof(*ib_info));
916 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
917
918 ibs_request = calloc(1, sizeof(*ibs_request));
919 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
920
921 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
922 CU_ASSERT_EQUAL(r, 0);
923
924 /* prepare resource */
925 resources = calloc(4, sizeof(amdgpu_bo_handle));
926 CU_ASSERT_NOT_EQUAL(resources, NULL);
927
928 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
929 0, &vram_info);
930 CU_ASSERT_EQUAL(r, 0);
931
932 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
933 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
934 CU_ASSERT_EQUAL(r, 0);
935 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
936 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
937 CU_ASSERT_EQUAL(r, 0);
938
939 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
940 0, >t_info);
941 CU_ASSERT_EQUAL(r, 0);
942
943 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
944 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
945 CU_ASSERT_EQUAL(r, 0);
946 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
947 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
948 CU_ASSERT_EQUAL(r, 0);
949
950
951
952 loop1 = loop2 = 0;
953 /* run 9 circle to test all mapping combination */
954 while(loop1 < 2) {
955 while(loop2 < 2) {
956 /* allocate UC bo1for sDMA use */
957 r = amdgpu_bo_alloc_and_map(device_handle,
958 sdma_write_length, 4096,
959 AMDGPU_GEM_DOMAIN_GTT,
960 gtt_flags[loop1], &bo1,
961 (void**)&bo1_cpu, &bo1_mc,
962 &bo1_va_handle);
963 CU_ASSERT_EQUAL(r, 0);
964
965 /* set bo1 */
966 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
967
968 /* allocate UC bo2 for sDMA use */
969 r = amdgpu_bo_alloc_and_map(device_handle,
970 sdma_write_length, 4096,
971 AMDGPU_GEM_DOMAIN_GTT,
972 gtt_flags[loop2], &bo2,
973 (void**)&bo2_cpu, &bo2_mc,
974 &bo2_va_handle);
975 CU_ASSERT_EQUAL(r, 0);
976
977 /* clear bo2 */
978 memset((void*)bo2_cpu, 0, sdma_write_length);
979
980 resources[0] = bo1;
981 resources[1] = bo2;
982 resources[2] = vram_max[loop2];
983 resources[3] = gtt_max[loop2];
984
985 /* fulfill PM4: test DMA copy linear */
986 i = j = 0;
987 if (family_id == AMDGPU_FAMILY_SI) {
988 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
989 sdma_write_length);
990 pm4[i++] = 0xffffffff & bo2_mc;
991 pm4[i++] = 0xffffffff & bo1_mc;
992 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
993 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
994 } else {
995 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
996 if (family_id >= AMDGPU_FAMILY_AI)
997 pm4[i++] = sdma_write_length - 1;
998 else
999 pm4[i++] = sdma_write_length;
1000 pm4[i++] = 0;
1001 pm4[i++] = 0xffffffff & bo1_mc;
1002 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1003 pm4[i++] = 0xffffffff & bo2_mc;
1004 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1005 }
1006
1007 amdgpu_test_exec_cs_helper(context_handle,
1008 AMDGPU_HW_IP_DMA, 0,
1009 i, pm4,
1010 4, resources,
1011 ib_info, ibs_request);
1012
1013 /* verify if SDMA test result meets with expected */
1014 i = 0;
1015 while(i < sdma_write_length) {
1016 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1017 }
1018 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1019 sdma_write_length);
1020 CU_ASSERT_EQUAL(r, 0);
1021 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1022 sdma_write_length);
1023 CU_ASSERT_EQUAL(r, 0);
1024 loop2++;
1025 }
1026 loop2 = 0;
1027 loop1++;
1028 }
1029 amdgpu_bo_free(vram_max[0]);
1030 amdgpu_bo_free(vram_max[1]);
1031 amdgpu_bo_free(gtt_max[0]);
1032 amdgpu_bo_free(gtt_max[1]);
1033 /* clean resources */
1034 free(resources);
1035 free(ibs_request);
1036 free(ib_info);
1037 free(pm4);
1038
1039 /* end of test */
1040 r = amdgpu_cs_ctx_free(context_handle);
1041 CU_ASSERT_EQUAL(r, 0);
1042 }
1043
1044
amdgpu_command_submission_gfx(void)1045 static void amdgpu_command_submission_gfx(void)
1046 {
1047 /* write data using the CP */
1048 amdgpu_command_submission_gfx_cp_write_data();
1049 /* const fill using the CP */
1050 amdgpu_command_submission_gfx_cp_const_fill();
1051 /* copy data using the CP */
1052 amdgpu_command_submission_gfx_cp_copy_data();
1053 /* separate IB buffers for multi-IB submission */
1054 amdgpu_command_submission_gfx_separate_ibs();
1055 /* shared IB buffer for multi-IB submission */
1056 amdgpu_command_submission_gfx_shared_ib();
1057 }
1058
amdgpu_semaphore_test(void)1059 static void amdgpu_semaphore_test(void)
1060 {
1061 amdgpu_context_handle context_handle[2];
1062 amdgpu_semaphore_handle sem;
1063 amdgpu_bo_handle ib_result_handle[2];
1064 void *ib_result_cpu[2];
1065 uint64_t ib_result_mc_address[2];
1066 struct amdgpu_cs_request ibs_request[2] = {0};
1067 struct amdgpu_cs_ib_info ib_info[2] = {0};
1068 struct amdgpu_cs_fence fence_status = {0};
1069 uint32_t *ptr;
1070 uint32_t expired;
1071 uint32_t sdma_nop, gfx_nop;
1072 amdgpu_bo_list_handle bo_list[2];
1073 amdgpu_va_handle va_handle[2];
1074 int r, i;
1075 struct amdgpu_gpu_info gpu_info = {0};
1076 unsigned gc_ip_type;
1077
1078 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1079 CU_ASSERT_EQUAL(r, 0);
1080
1081 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1082 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1083
1084 if (family_id == AMDGPU_FAMILY_SI) {
1085 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1086 gfx_nop = GFX_COMPUTE_NOP_SI;
1087 } else {
1088 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1089 gfx_nop = GFX_COMPUTE_NOP;
1090 }
1091
1092 r = amdgpu_cs_create_semaphore(&sem);
1093 CU_ASSERT_EQUAL(r, 0);
1094 for (i = 0; i < 2; i++) {
1095 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1096 CU_ASSERT_EQUAL(r, 0);
1097
1098 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1099 AMDGPU_GEM_DOMAIN_GTT, 0,
1100 &ib_result_handle[i], &ib_result_cpu[i],
1101 &ib_result_mc_address[i], &va_handle[i]);
1102 CU_ASSERT_EQUAL(r, 0);
1103
1104 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1105 NULL, &bo_list[i]);
1106 CU_ASSERT_EQUAL(r, 0);
1107 }
1108
1109 /* 1. same context different engine */
1110 ptr = ib_result_cpu[0];
1111 ptr[0] = sdma_nop;
1112 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1113 ib_info[0].size = 1;
1114
1115 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1116 ibs_request[0].number_of_ibs = 1;
1117 ibs_request[0].ibs = &ib_info[0];
1118 ibs_request[0].resources = bo_list[0];
1119 ibs_request[0].fence_info.handle = NULL;
1120 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1121 CU_ASSERT_EQUAL(r, 0);
1122 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1123 CU_ASSERT_EQUAL(r, 0);
1124
1125 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1126 CU_ASSERT_EQUAL(r, 0);
1127 ptr = ib_result_cpu[1];
1128 ptr[0] = gfx_nop;
1129 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1130 ib_info[1].size = 1;
1131
1132 ibs_request[1].ip_type = gc_ip_type;
1133 ibs_request[1].number_of_ibs = 1;
1134 ibs_request[1].ibs = &ib_info[1];
1135 ibs_request[1].resources = bo_list[1];
1136 ibs_request[1].fence_info.handle = NULL;
1137
1138 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1139 CU_ASSERT_EQUAL(r, 0);
1140
1141 fence_status.context = context_handle[0];
1142 fence_status.ip_type = gc_ip_type;
1143 fence_status.ip_instance = 0;
1144 fence_status.fence = ibs_request[1].seq_no;
1145 r = amdgpu_cs_query_fence_status(&fence_status,
1146 500000000, 0, &expired);
1147 CU_ASSERT_EQUAL(r, 0);
1148 CU_ASSERT_EQUAL(expired, true);
1149
1150 /* 2. same engine different context */
1151 ptr = ib_result_cpu[0];
1152 ptr[0] = gfx_nop;
1153 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1154 ib_info[0].size = 1;
1155
1156 ibs_request[0].ip_type = gc_ip_type;
1157 ibs_request[0].number_of_ibs = 1;
1158 ibs_request[0].ibs = &ib_info[0];
1159 ibs_request[0].resources = bo_list[0];
1160 ibs_request[0].fence_info.handle = NULL;
1161 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1162 CU_ASSERT_EQUAL(r, 0);
1163 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1164 CU_ASSERT_EQUAL(r, 0);
1165
1166 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1167 CU_ASSERT_EQUAL(r, 0);
1168 ptr = ib_result_cpu[1];
1169 ptr[0] = gfx_nop;
1170 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1171 ib_info[1].size = 1;
1172
1173 ibs_request[1].ip_type = gc_ip_type;
1174 ibs_request[1].number_of_ibs = 1;
1175 ibs_request[1].ibs = &ib_info[1];
1176 ibs_request[1].resources = bo_list[1];
1177 ibs_request[1].fence_info.handle = NULL;
1178 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1179
1180 CU_ASSERT_EQUAL(r, 0);
1181
1182 fence_status.context = context_handle[1];
1183 fence_status.ip_type = gc_ip_type;
1184 fence_status.ip_instance = 0;
1185 fence_status.fence = ibs_request[1].seq_no;
1186 r = amdgpu_cs_query_fence_status(&fence_status,
1187 500000000, 0, &expired);
1188 CU_ASSERT_EQUAL(r, 0);
1189 CU_ASSERT_EQUAL(expired, true);
1190
1191 for (i = 0; i < 2; i++) {
1192 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1193 ib_result_mc_address[i], 4096);
1194 CU_ASSERT_EQUAL(r, 0);
1195
1196 r = amdgpu_bo_list_destroy(bo_list[i]);
1197 CU_ASSERT_EQUAL(r, 0);
1198
1199 r = amdgpu_cs_ctx_free(context_handle[i]);
1200 CU_ASSERT_EQUAL(r, 0);
1201 }
1202
1203 r = amdgpu_cs_destroy_semaphore(sem);
1204 CU_ASSERT_EQUAL(r, 0);
1205 }
1206
amdgpu_command_submission_compute_nop(void)1207 static void amdgpu_command_submission_compute_nop(void)
1208 {
1209 amdgpu_context_handle context_handle;
1210 amdgpu_bo_handle ib_result_handle;
1211 void *ib_result_cpu;
1212 uint64_t ib_result_mc_address;
1213 struct amdgpu_cs_request ibs_request;
1214 struct amdgpu_cs_ib_info ib_info;
1215 struct amdgpu_cs_fence fence_status;
1216 uint32_t *ptr;
1217 uint32_t expired;
1218 int r, instance;
1219 amdgpu_bo_list_handle bo_list;
1220 amdgpu_va_handle va_handle;
1221 struct drm_amdgpu_info_hw_ip info;
1222
1223 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1224 CU_ASSERT_EQUAL(r, 0);
1225
1226 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1227 CU_ASSERT_EQUAL(r, 0);
1228
1229 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1230 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1231 AMDGPU_GEM_DOMAIN_GTT, 0,
1232 &ib_result_handle, &ib_result_cpu,
1233 &ib_result_mc_address, &va_handle);
1234 CU_ASSERT_EQUAL(r, 0);
1235
1236 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1237 &bo_list);
1238 CU_ASSERT_EQUAL(r, 0);
1239
1240 ptr = ib_result_cpu;
1241 memset(ptr, 0, 16);
1242 ptr[0]=PACKET3(PACKET3_NOP, 14);
1243
1244 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1245 ib_info.ib_mc_address = ib_result_mc_address;
1246 ib_info.size = 16;
1247
1248 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1249 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1250 ibs_request.ring = instance;
1251 ibs_request.number_of_ibs = 1;
1252 ibs_request.ibs = &ib_info;
1253 ibs_request.resources = bo_list;
1254 ibs_request.fence_info.handle = NULL;
1255
1256 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1257 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1258 CU_ASSERT_EQUAL(r, 0);
1259
1260 fence_status.context = context_handle;
1261 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1262 fence_status.ip_instance = 0;
1263 fence_status.ring = instance;
1264 fence_status.fence = ibs_request.seq_no;
1265
1266 r = amdgpu_cs_query_fence_status(&fence_status,
1267 AMDGPU_TIMEOUT_INFINITE,
1268 0, &expired);
1269 CU_ASSERT_EQUAL(r, 0);
1270
1271 r = amdgpu_bo_list_destroy(bo_list);
1272 CU_ASSERT_EQUAL(r, 0);
1273
1274 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1275 ib_result_mc_address, 4096);
1276 CU_ASSERT_EQUAL(r, 0);
1277 }
1278
1279 r = amdgpu_cs_ctx_free(context_handle);
1280 CU_ASSERT_EQUAL(r, 0);
1281 }
1282
amdgpu_command_submission_compute_cp_write_data(void)1283 static void amdgpu_command_submission_compute_cp_write_data(void)
1284 {
1285 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1286 }
1287
amdgpu_command_submission_compute_cp_const_fill(void)1288 static void amdgpu_command_submission_compute_cp_const_fill(void)
1289 {
1290 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1291 }
1292
amdgpu_command_submission_compute_cp_copy_data(void)1293 static void amdgpu_command_submission_compute_cp_copy_data(void)
1294 {
1295 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1296 }
1297
amdgpu_command_submission_compute(void)1298 static void amdgpu_command_submission_compute(void)
1299 {
1300 /* write data using the CP */
1301 amdgpu_command_submission_compute_cp_write_data();
1302 /* const fill using the CP */
1303 amdgpu_command_submission_compute_cp_const_fill();
1304 /* copy data using the CP */
1305 amdgpu_command_submission_compute_cp_copy_data();
1306 /* nop test */
1307 amdgpu_command_submission_compute_nop();
1308 }
1309
1310 /*
1311 * caller need create/release:
1312 * pm4_src, resources, ib_info, and ibs_request
1313 * submit command stream described in ibs_request and wait for this IB accomplished
1314 */
1315 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1316 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1317 amdgpu_context_handle context_handle,
1318 unsigned ip_type, int instance, int pm4_dw,
1319 uint32_t *pm4_src, int res_cnt,
1320 amdgpu_bo_handle *resources,
1321 struct amdgpu_cs_ib_info *ib_info,
1322 struct amdgpu_cs_request *ibs_request,
1323 bool secure)
1324 {
1325 int r;
1326 uint32_t expired;
1327 uint32_t *ring_ptr;
1328 amdgpu_bo_handle ib_result_handle;
1329 void *ib_result_cpu;
1330 uint64_t ib_result_mc_address;
1331 struct amdgpu_cs_fence fence_status = {0};
1332 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1333 amdgpu_va_handle va_handle;
1334
1335 /* prepare CS */
1336 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1337 CU_ASSERT_NOT_EQUAL(resources, NULL);
1338 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1339 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1340 CU_ASSERT_TRUE(pm4_dw <= 1024);
1341
1342 /* allocate IB */
1343 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1344 AMDGPU_GEM_DOMAIN_GTT, 0,
1345 &ib_result_handle, &ib_result_cpu,
1346 &ib_result_mc_address, &va_handle);
1347 CU_ASSERT_EQUAL(r, 0);
1348
1349 /* copy PM4 packet to ring from caller */
1350 ring_ptr = ib_result_cpu;
1351 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1352
1353 ib_info->ib_mc_address = ib_result_mc_address;
1354 ib_info->size = pm4_dw;
1355 if (secure)
1356 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1357
1358 ibs_request->ip_type = ip_type;
1359 ibs_request->ring = instance;
1360 ibs_request->number_of_ibs = 1;
1361 ibs_request->ibs = ib_info;
1362 ibs_request->fence_info.handle = NULL;
1363
1364 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1365 all_res[res_cnt] = ib_result_handle;
1366
1367 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1368 NULL, &ibs_request->resources);
1369 CU_ASSERT_EQUAL(r, 0);
1370
1371 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1372
1373 /* submit CS */
1374 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1375 CU_ASSERT_EQUAL(r, 0);
1376
1377 r = amdgpu_bo_list_destroy(ibs_request->resources);
1378 CU_ASSERT_EQUAL(r, 0);
1379
1380 fence_status.ip_type = ip_type;
1381 fence_status.ip_instance = 0;
1382 fence_status.ring = ibs_request->ring;
1383 fence_status.context = context_handle;
1384 fence_status.fence = ibs_request->seq_no;
1385
1386 /* wait for IB accomplished */
1387 r = amdgpu_cs_query_fence_status(&fence_status,
1388 AMDGPU_TIMEOUT_INFINITE,
1389 0, &expired);
1390 CU_ASSERT_EQUAL(r, 0);
1391 CU_ASSERT_EQUAL(expired, true);
1392
1393 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1394 ib_result_mc_address, 4096);
1395 CU_ASSERT_EQUAL(r, 0);
1396 }
1397
1398 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1399 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1400 unsigned ip_type, int instance, int pm4_dw,
1401 uint32_t *pm4_src, int res_cnt,
1402 amdgpu_bo_handle *resources,
1403 struct amdgpu_cs_ib_info *ib_info,
1404 struct amdgpu_cs_request *ibs_request)
1405 {
1406 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1407 ip_type, instance, pm4_dw, pm4_src,
1408 res_cnt, resources, ib_info,
1409 ibs_request, false);
1410 }
1411
1412 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1413 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1414 device, unsigned
1415 ip_type, bool secure)
1416 {
1417 const int sdma_write_length = 128;
1418 const int pm4_dw = 256;
1419 amdgpu_context_handle context_handle;
1420 amdgpu_bo_handle bo;
1421 amdgpu_bo_handle *resources;
1422 uint32_t *pm4;
1423 struct amdgpu_cs_ib_info *ib_info;
1424 struct amdgpu_cs_request *ibs_request;
1425 uint64_t bo_mc;
1426 volatile uint32_t *bo_cpu;
1427 uint32_t bo_cpu_origin;
1428 int i, j, r, loop, ring_id;
1429 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1430 amdgpu_va_handle va_handle;
1431 struct drm_amdgpu_info_hw_ip hw_ip_info;
1432
1433 pm4 = calloc(pm4_dw, sizeof(*pm4));
1434 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1435
1436 ib_info = calloc(1, sizeof(*ib_info));
1437 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1438
1439 ibs_request = calloc(1, sizeof(*ibs_request));
1440 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1441
1442 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1443 CU_ASSERT_EQUAL(r, 0);
1444
1445 for (i = 0; secure && (i < 2); i++)
1446 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1447
1448 r = amdgpu_cs_ctx_create(device, &context_handle);
1449
1450 CU_ASSERT_EQUAL(r, 0);
1451
1452 /* prepare resource */
1453 resources = calloc(1, sizeof(amdgpu_bo_handle));
1454 CU_ASSERT_NOT_EQUAL(resources, NULL);
1455
1456 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1457 loop = 0;
1458 while(loop < 2) {
1459 /* allocate UC bo for sDMA use */
1460 r = amdgpu_bo_alloc_and_map(device,
1461 sdma_write_length * sizeof(uint32_t),
1462 4096, AMDGPU_GEM_DOMAIN_GTT,
1463 gtt_flags[loop], &bo, (void**)&bo_cpu,
1464 &bo_mc, &va_handle);
1465 CU_ASSERT_EQUAL(r, 0);
1466
1467 /* clear bo */
1468 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1469
1470 resources[0] = bo;
1471
1472 /* fulfill PM4: test DMA write-linear */
1473 i = j = 0;
1474 if (ip_type == AMDGPU_HW_IP_DMA) {
1475 if (family_id == AMDGPU_FAMILY_SI)
1476 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1477 sdma_write_length);
1478 else
1479 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1480 SDMA_WRITE_SUB_OPCODE_LINEAR,
1481 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1482 pm4[i++] = 0xfffffffc & bo_mc;
1483 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1484 if (family_id >= AMDGPU_FAMILY_AI)
1485 pm4[i++] = sdma_write_length - 1;
1486 else if (family_id != AMDGPU_FAMILY_SI)
1487 pm4[i++] = sdma_write_length;
1488 while(j++ < sdma_write_length)
1489 pm4[i++] = 0xdeadbeaf;
1490 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1491 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1492 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1493 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1494 pm4[i++] = 0xfffffffc & bo_mc;
1495 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1496 while(j++ < sdma_write_length)
1497 pm4[i++] = 0xdeadbeaf;
1498 }
1499
1500 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1501 ip_type, ring_id, i, pm4,
1502 1, resources, ib_info,
1503 ibs_request, secure);
1504
1505 /* verify if SDMA test result meets with expected */
1506 i = 0;
1507 if (!secure) {
1508 while(i < sdma_write_length) {
1509 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1510 }
1511 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1512 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1513 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1514 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1515 * command, 1-loop_until_compare_satisfied.
1516 * single_pass_atomic, 0-lru
1517 * engine_sel, 0-micro_engine
1518 */
1519 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1520 ATOMIC_MEM_COMMAND(1) |
1521 ATOMIC_MEM_CACHEPOLICAY(0) |
1522 ATOMIC_MEM_ENGINESEL(0));
1523 pm4[i++] = 0xfffffffc & bo_mc;
1524 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1525 pm4[i++] = 0x12345678;
1526 pm4[i++] = 0x0;
1527 pm4[i++] = 0xdeadbeaf;
1528 pm4[i++] = 0x0;
1529 pm4[i++] = 0x100;
1530 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1531 ip_type, ring_id, i, pm4,
1532 1, resources, ib_info,
1533 ibs_request, true);
1534 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1535 /* restore the bo_cpu to compare */
1536 bo_cpu_origin = bo_cpu[0];
1537 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1538 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1539 * loop, 1-loop_until_compare_satisfied.
1540 * single_pass_atomic, 0-lru
1541 */
1542 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1543 0,
1544 SDMA_ATOMIC_LOOP(1) |
1545 SDMA_ATOMIC_TMZ(1) |
1546 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1547 pm4[i++] = 0xfffffffc & bo_mc;
1548 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1549 pm4[i++] = 0x12345678;
1550 pm4[i++] = 0x0;
1551 pm4[i++] = 0xdeadbeaf;
1552 pm4[i++] = 0x0;
1553 pm4[i++] = 0x100;
1554 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1555 ip_type, ring_id, i, pm4,
1556 1, resources, ib_info,
1557 ibs_request, true);
1558 /* DMA's atomic behavir is unlike GFX
1559 * If the comparing data is not equal to destination data,
1560 * For GFX, loop again till gfx timeout(system hang).
1561 * For DMA, loop again till timer expired and then send interrupt.
1562 * So testcase can't use interrupt mechanism.
1563 * We take another way to verify. When the comparing data is not
1564 * equal to destination data, overwrite the source data to the destination
1565 * buffer. Otherwise, original destination data unchanged.
1566 * So if the bo_cpu data is overwritten, the result is passed.
1567 */
1568 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1569
1570 /* compare again for the case of dest_data != cmp_data */
1571 i = 0;
1572 /* restore again, here dest_data should be */
1573 bo_cpu_origin = bo_cpu[0];
1574 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1575 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1576 0,
1577 SDMA_ATOMIC_LOOP(1) |
1578 SDMA_ATOMIC_TMZ(1) |
1579 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1580 pm4[i++] = 0xfffffffc & bo_mc;
1581 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1582 pm4[i++] = 0x87654321;
1583 pm4[i++] = 0x0;
1584 pm4[i++] = 0xdeadbeaf;
1585 pm4[i++] = 0x0;
1586 pm4[i++] = 0x100;
1587 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1588 ip_type, ring_id, i, pm4,
1589 1, resources, ib_info,
1590 ibs_request, true);
1591 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1592 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1593 }
1594
1595 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1596 sdma_write_length * sizeof(uint32_t));
1597 CU_ASSERT_EQUAL(r, 0);
1598 loop++;
1599 }
1600 }
1601 /* clean resources */
1602 free(resources);
1603 free(ibs_request);
1604 free(ib_info);
1605 free(pm4);
1606
1607 /* end of test */
1608 r = amdgpu_cs_ctx_free(context_handle);
1609 CU_ASSERT_EQUAL(r, 0);
1610 }
1611
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1612 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1613 {
1614 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1615 ip_type,
1616 false);
1617 }
1618
amdgpu_command_submission_sdma_write_linear(void)1619 static void amdgpu_command_submission_sdma_write_linear(void)
1620 {
1621 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1622 }
1623
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1624 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1625 {
1626 const int sdma_write_length = 1024 * 1024;
1627 const int pm4_dw = 256;
1628 amdgpu_context_handle context_handle;
1629 amdgpu_bo_handle bo;
1630 amdgpu_bo_handle *resources;
1631 uint32_t *pm4;
1632 struct amdgpu_cs_ib_info *ib_info;
1633 struct amdgpu_cs_request *ibs_request;
1634 uint64_t bo_mc;
1635 volatile uint32_t *bo_cpu;
1636 int i, j, r, loop, ring_id;
1637 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1638 amdgpu_va_handle va_handle;
1639 struct drm_amdgpu_info_hw_ip hw_ip_info;
1640
1641 pm4 = calloc(pm4_dw, sizeof(*pm4));
1642 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1643
1644 ib_info = calloc(1, sizeof(*ib_info));
1645 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1646
1647 ibs_request = calloc(1, sizeof(*ibs_request));
1648 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1649
1650 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1651 CU_ASSERT_EQUAL(r, 0);
1652
1653 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1654 CU_ASSERT_EQUAL(r, 0);
1655
1656 /* prepare resource */
1657 resources = calloc(1, sizeof(amdgpu_bo_handle));
1658 CU_ASSERT_NOT_EQUAL(resources, NULL);
1659
1660 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1661 loop = 0;
1662 while(loop < 2) {
1663 /* allocate UC bo for sDMA use */
1664 r = amdgpu_bo_alloc_and_map(device_handle,
1665 sdma_write_length, 4096,
1666 AMDGPU_GEM_DOMAIN_GTT,
1667 gtt_flags[loop], &bo, (void**)&bo_cpu,
1668 &bo_mc, &va_handle);
1669 CU_ASSERT_EQUAL(r, 0);
1670
1671 /* clear bo */
1672 memset((void*)bo_cpu, 0, sdma_write_length);
1673
1674 resources[0] = bo;
1675
1676 /* fulfill PM4: test DMA const fill */
1677 i = j = 0;
1678 if (ip_type == AMDGPU_HW_IP_DMA) {
1679 if (family_id == AMDGPU_FAMILY_SI) {
1680 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1681 0, 0, 0,
1682 sdma_write_length / 4);
1683 pm4[i++] = 0xfffffffc & bo_mc;
1684 pm4[i++] = 0xdeadbeaf;
1685 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1686 } else {
1687 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1688 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1689 pm4[i++] = 0xffffffff & bo_mc;
1690 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1691 pm4[i++] = 0xdeadbeaf;
1692 if (family_id >= AMDGPU_FAMILY_AI)
1693 pm4[i++] = sdma_write_length - 1;
1694 else
1695 pm4[i++] = sdma_write_length;
1696 }
1697 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1698 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1699 if (family_id == AMDGPU_FAMILY_SI) {
1700 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1701 pm4[i++] = 0xdeadbeaf;
1702 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1703 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1704 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1705 PACKET3_DMA_DATA_SI_CP_SYNC;
1706 pm4[i++] = 0xffffffff & bo_mc;
1707 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1708 pm4[i++] = sdma_write_length;
1709 } else {
1710 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1711 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1712 PACKET3_DMA_DATA_DST_SEL(0) |
1713 PACKET3_DMA_DATA_SRC_SEL(2) |
1714 PACKET3_DMA_DATA_CP_SYNC;
1715 pm4[i++] = 0xdeadbeaf;
1716 pm4[i++] = 0;
1717 pm4[i++] = 0xfffffffc & bo_mc;
1718 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1719 pm4[i++] = sdma_write_length;
1720 }
1721 }
1722
1723 amdgpu_test_exec_cs_helper(context_handle,
1724 ip_type, ring_id,
1725 i, pm4,
1726 1, resources,
1727 ib_info, ibs_request);
1728
1729 /* verify if SDMA test result meets with expected */
1730 i = 0;
1731 while(i < (sdma_write_length / 4)) {
1732 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1733 }
1734
1735 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1736 sdma_write_length);
1737 CU_ASSERT_EQUAL(r, 0);
1738 loop++;
1739 }
1740 }
1741 /* clean resources */
1742 free(resources);
1743 free(ibs_request);
1744 free(ib_info);
1745 free(pm4);
1746
1747 /* end of test */
1748 r = amdgpu_cs_ctx_free(context_handle);
1749 CU_ASSERT_EQUAL(r, 0);
1750 }
1751
amdgpu_command_submission_sdma_const_fill(void)1752 static void amdgpu_command_submission_sdma_const_fill(void)
1753 {
1754 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1755 }
1756
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1757 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1758 {
1759 const int sdma_write_length = 1024;
1760 const int pm4_dw = 256;
1761 amdgpu_context_handle context_handle;
1762 amdgpu_bo_handle bo1, bo2;
1763 amdgpu_bo_handle *resources;
1764 uint32_t *pm4;
1765 struct amdgpu_cs_ib_info *ib_info;
1766 struct amdgpu_cs_request *ibs_request;
1767 uint64_t bo1_mc, bo2_mc;
1768 volatile unsigned char *bo1_cpu, *bo2_cpu;
1769 int i, j, r, loop1, loop2, ring_id;
1770 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1771 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1772 struct drm_amdgpu_info_hw_ip hw_ip_info;
1773
1774 pm4 = calloc(pm4_dw, sizeof(*pm4));
1775 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1776
1777 ib_info = calloc(1, sizeof(*ib_info));
1778 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1779
1780 ibs_request = calloc(1, sizeof(*ibs_request));
1781 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1782
1783 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1784 CU_ASSERT_EQUAL(r, 0);
1785
1786 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1787 CU_ASSERT_EQUAL(r, 0);
1788
1789 /* prepare resource */
1790 resources = calloc(2, sizeof(amdgpu_bo_handle));
1791 CU_ASSERT_NOT_EQUAL(resources, NULL);
1792
1793 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1794 loop1 = loop2 = 0;
1795 /* run 9 circle to test all mapping combination */
1796 while(loop1 < 2) {
1797 while(loop2 < 2) {
1798 /* allocate UC bo1for sDMA use */
1799 r = amdgpu_bo_alloc_and_map(device_handle,
1800 sdma_write_length, 4096,
1801 AMDGPU_GEM_DOMAIN_GTT,
1802 gtt_flags[loop1], &bo1,
1803 (void**)&bo1_cpu, &bo1_mc,
1804 &bo1_va_handle);
1805 CU_ASSERT_EQUAL(r, 0);
1806
1807 /* set bo1 */
1808 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1809
1810 /* allocate UC bo2 for sDMA use */
1811 r = amdgpu_bo_alloc_and_map(device_handle,
1812 sdma_write_length, 4096,
1813 AMDGPU_GEM_DOMAIN_GTT,
1814 gtt_flags[loop2], &bo2,
1815 (void**)&bo2_cpu, &bo2_mc,
1816 &bo2_va_handle);
1817 CU_ASSERT_EQUAL(r, 0);
1818
1819 /* clear bo2 */
1820 memset((void*)bo2_cpu, 0, sdma_write_length);
1821
1822 resources[0] = bo1;
1823 resources[1] = bo2;
1824
1825 /* fulfill PM4: test DMA copy linear */
1826 i = j = 0;
1827 if (ip_type == AMDGPU_HW_IP_DMA) {
1828 if (family_id == AMDGPU_FAMILY_SI) {
1829 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1830 0, 0, 0,
1831 sdma_write_length);
1832 pm4[i++] = 0xffffffff & bo2_mc;
1833 pm4[i++] = 0xffffffff & bo1_mc;
1834 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1836 } else {
1837 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1838 SDMA_COPY_SUB_OPCODE_LINEAR,
1839 0);
1840 if (family_id >= AMDGPU_FAMILY_AI)
1841 pm4[i++] = sdma_write_length - 1;
1842 else
1843 pm4[i++] = sdma_write_length;
1844 pm4[i++] = 0;
1845 pm4[i++] = 0xffffffff & bo1_mc;
1846 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1847 pm4[i++] = 0xffffffff & bo2_mc;
1848 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1849 }
1850 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1851 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1852 if (family_id == AMDGPU_FAMILY_SI) {
1853 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1854 pm4[i++] = 0xfffffffc & bo1_mc;
1855 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1856 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1857 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1858 PACKET3_DMA_DATA_SI_CP_SYNC |
1859 (0xffff00000000 & bo1_mc) >> 32;
1860 pm4[i++] = 0xfffffffc & bo2_mc;
1861 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1862 pm4[i++] = sdma_write_length;
1863 } else {
1864 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1865 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1866 PACKET3_DMA_DATA_DST_SEL(0) |
1867 PACKET3_DMA_DATA_SRC_SEL(0) |
1868 PACKET3_DMA_DATA_CP_SYNC;
1869 pm4[i++] = 0xfffffffc & bo1_mc;
1870 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1871 pm4[i++] = 0xfffffffc & bo2_mc;
1872 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1873 pm4[i++] = sdma_write_length;
1874 }
1875 }
1876
1877 amdgpu_test_exec_cs_helper(context_handle,
1878 ip_type, ring_id,
1879 i, pm4,
1880 2, resources,
1881 ib_info, ibs_request);
1882
1883 /* verify if SDMA test result meets with expected */
1884 i = 0;
1885 while(i < sdma_write_length) {
1886 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1887 }
1888 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1889 sdma_write_length);
1890 CU_ASSERT_EQUAL(r, 0);
1891 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1892 sdma_write_length);
1893 CU_ASSERT_EQUAL(r, 0);
1894 loop2++;
1895 }
1896 loop1++;
1897 }
1898 }
1899 /* clean resources */
1900 free(resources);
1901 free(ibs_request);
1902 free(ib_info);
1903 free(pm4);
1904
1905 /* end of test */
1906 r = amdgpu_cs_ctx_free(context_handle);
1907 CU_ASSERT_EQUAL(r, 0);
1908 }
1909
amdgpu_command_submission_sdma_copy_linear(void)1910 static void amdgpu_command_submission_sdma_copy_linear(void)
1911 {
1912 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1913 }
1914
amdgpu_command_submission_sdma(void)1915 static void amdgpu_command_submission_sdma(void)
1916 {
1917 amdgpu_command_submission_sdma_write_linear();
1918 amdgpu_command_submission_sdma_const_fill();
1919 amdgpu_command_submission_sdma_copy_linear();
1920 }
1921
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1922 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1923 {
1924 amdgpu_context_handle context_handle;
1925 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1926 void *ib_result_cpu, *ib_result_ce_cpu;
1927 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1928 struct amdgpu_cs_request ibs_request[2] = {0};
1929 struct amdgpu_cs_ib_info ib_info[2];
1930 struct amdgpu_cs_fence fence_status[2] = {0};
1931 uint32_t *ptr;
1932 uint32_t expired;
1933 amdgpu_bo_list_handle bo_list;
1934 amdgpu_va_handle va_handle, va_handle_ce;
1935 int r;
1936 int i = 0, ib_cs_num = 2;
1937
1938 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1939 CU_ASSERT_EQUAL(r, 0);
1940
1941 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1942 AMDGPU_GEM_DOMAIN_GTT, 0,
1943 &ib_result_handle, &ib_result_cpu,
1944 &ib_result_mc_address, &va_handle);
1945 CU_ASSERT_EQUAL(r, 0);
1946
1947 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1948 AMDGPU_GEM_DOMAIN_GTT, 0,
1949 &ib_result_ce_handle, &ib_result_ce_cpu,
1950 &ib_result_ce_mc_address, &va_handle_ce);
1951 CU_ASSERT_EQUAL(r, 0);
1952
1953 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1954 ib_result_ce_handle, &bo_list);
1955 CU_ASSERT_EQUAL(r, 0);
1956
1957 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1958
1959 /* IT_SET_CE_DE_COUNTERS */
1960 ptr = ib_result_ce_cpu;
1961 if (family_id != AMDGPU_FAMILY_SI) {
1962 ptr[i++] = 0xc0008900;
1963 ptr[i++] = 0;
1964 }
1965 ptr[i++] = 0xc0008400;
1966 ptr[i++] = 1;
1967 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1968 ib_info[0].size = i;
1969 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1970
1971 /* IT_WAIT_ON_CE_COUNTER */
1972 ptr = ib_result_cpu;
1973 ptr[0] = 0xc0008600;
1974 ptr[1] = 0x00000001;
1975 ib_info[1].ib_mc_address = ib_result_mc_address;
1976 ib_info[1].size = 2;
1977
1978 for (i = 0; i < ib_cs_num; i++) {
1979 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1980 ibs_request[i].number_of_ibs = 2;
1981 ibs_request[i].ibs = ib_info;
1982 ibs_request[i].resources = bo_list;
1983 ibs_request[i].fence_info.handle = NULL;
1984 }
1985
1986 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1987
1988 CU_ASSERT_EQUAL(r, 0);
1989
1990 for (i = 0; i < ib_cs_num; i++) {
1991 fence_status[i].context = context_handle;
1992 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1993 fence_status[i].fence = ibs_request[i].seq_no;
1994 }
1995
1996 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1997 AMDGPU_TIMEOUT_INFINITE,
1998 &expired, NULL);
1999 CU_ASSERT_EQUAL(r, 0);
2000
2001 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2002 ib_result_mc_address, 4096);
2003 CU_ASSERT_EQUAL(r, 0);
2004
2005 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2006 ib_result_ce_mc_address, 4096);
2007 CU_ASSERT_EQUAL(r, 0);
2008
2009 r = amdgpu_bo_list_destroy(bo_list);
2010 CU_ASSERT_EQUAL(r, 0);
2011
2012 r = amdgpu_cs_ctx_free(context_handle);
2013 CU_ASSERT_EQUAL(r, 0);
2014 }
2015
amdgpu_command_submission_multi_fence(void)2016 static void amdgpu_command_submission_multi_fence(void)
2017 {
2018 amdgpu_command_submission_multi_fence_wait_all(true);
2019 amdgpu_command_submission_multi_fence_wait_all(false);
2020 }
2021
amdgpu_userptr_test(void)2022 static void amdgpu_userptr_test(void)
2023 {
2024 int i, r, j;
2025 uint32_t *pm4 = NULL;
2026 uint64_t bo_mc;
2027 void *ptr = NULL;
2028 int pm4_dw = 256;
2029 int sdma_write_length = 4;
2030 amdgpu_bo_handle handle;
2031 amdgpu_context_handle context_handle;
2032 struct amdgpu_cs_ib_info *ib_info;
2033 struct amdgpu_cs_request *ibs_request;
2034 amdgpu_bo_handle buf_handle;
2035 amdgpu_va_handle va_handle;
2036
2037 pm4 = calloc(pm4_dw, sizeof(*pm4));
2038 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2039
2040 ib_info = calloc(1, sizeof(*ib_info));
2041 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2042
2043 ibs_request = calloc(1, sizeof(*ibs_request));
2044 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2045
2046 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2047 CU_ASSERT_EQUAL(r, 0);
2048
2049 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2050 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2051 memset(ptr, 0, BUFFER_SIZE);
2052
2053 r = amdgpu_create_bo_from_user_mem(device_handle,
2054 ptr, BUFFER_SIZE, &buf_handle);
2055 CU_ASSERT_EQUAL(r, 0);
2056
2057 r = amdgpu_va_range_alloc(device_handle,
2058 amdgpu_gpu_va_range_general,
2059 BUFFER_SIZE, 1, 0, &bo_mc,
2060 &va_handle, 0);
2061 CU_ASSERT_EQUAL(r, 0);
2062
2063 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2064 CU_ASSERT_EQUAL(r, 0);
2065
2066 handle = buf_handle;
2067
2068 j = i = 0;
2069
2070 if (family_id == AMDGPU_FAMILY_SI)
2071 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2072 sdma_write_length);
2073 else
2074 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2075 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2076 pm4[i++] = 0xffffffff & bo_mc;
2077 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2078 if (family_id >= AMDGPU_FAMILY_AI)
2079 pm4[i++] = sdma_write_length - 1;
2080 else if (family_id != AMDGPU_FAMILY_SI)
2081 pm4[i++] = sdma_write_length;
2082
2083 while (j++ < sdma_write_length)
2084 pm4[i++] = 0xdeadbeaf;
2085
2086 if (!fork()) {
2087 pm4[0] = 0x0;
2088 exit(0);
2089 }
2090
2091 amdgpu_test_exec_cs_helper(context_handle,
2092 AMDGPU_HW_IP_DMA, 0,
2093 i, pm4,
2094 1, &handle,
2095 ib_info, ibs_request);
2096 i = 0;
2097 while (i < sdma_write_length) {
2098 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2099 }
2100 free(ibs_request);
2101 free(ib_info);
2102 free(pm4);
2103
2104 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2105 CU_ASSERT_EQUAL(r, 0);
2106 r = amdgpu_va_range_free(va_handle);
2107 CU_ASSERT_EQUAL(r, 0);
2108 r = amdgpu_bo_free(buf_handle);
2109 CU_ASSERT_EQUAL(r, 0);
2110 free(ptr);
2111
2112 r = amdgpu_cs_ctx_free(context_handle);
2113 CU_ASSERT_EQUAL(r, 0);
2114
2115 wait(NULL);
2116 }
2117
amdgpu_sync_dependency_test(void)2118 static void amdgpu_sync_dependency_test(void)
2119 {
2120 amdgpu_context_handle context_handle[2];
2121 amdgpu_bo_handle ib_result_handle;
2122 void *ib_result_cpu;
2123 uint64_t ib_result_mc_address;
2124 struct amdgpu_cs_request ibs_request;
2125 struct amdgpu_cs_ib_info ib_info;
2126 struct amdgpu_cs_fence fence_status;
2127 uint32_t expired;
2128 int i, j, r;
2129 amdgpu_bo_list_handle bo_list;
2130 amdgpu_va_handle va_handle;
2131 static uint32_t *ptr;
2132 uint64_t seq_no;
2133
2134 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2135 CU_ASSERT_EQUAL(r, 0);
2136 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2137 CU_ASSERT_EQUAL(r, 0);
2138
2139 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2140 AMDGPU_GEM_DOMAIN_GTT, 0,
2141 &ib_result_handle, &ib_result_cpu,
2142 &ib_result_mc_address, &va_handle);
2143 CU_ASSERT_EQUAL(r, 0);
2144
2145 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2146 &bo_list);
2147 CU_ASSERT_EQUAL(r, 0);
2148
2149 ptr = ib_result_cpu;
2150 i = 0;
2151
2152 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2153
2154 /* Dispatch minimal init config and verify it's executed */
2155 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2156 ptr[i++] = 0x80000000;
2157 ptr[i++] = 0x80000000;
2158
2159 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2160 ptr[i++] = 0x80000000;
2161
2162
2163 /* Program compute regs */
2164 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2165 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2166 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2167 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2168
2169
2170 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2171 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2172 /*
2173 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2174 SGPRS = 1
2175 PRIORITY = 0
2176 FLOAT_MODE = 192 (0xc0)
2177 PRIV = 0
2178 DX10_CLAMP = 1
2179 DEBUG_MODE = 0
2180 IEEE_MODE = 0
2181 BULKY = 0
2182 CDBG_USER = 0
2183 *
2184 */
2185 ptr[i++] = 0x002c0040;
2186
2187
2188 /*
2189 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2190 USER_SGPR = 8
2191 TRAP_PRESENT = 0
2192 TGID_X_EN = 0
2193 TGID_Y_EN = 0
2194 TGID_Z_EN = 0
2195 TG_SIZE_EN = 0
2196 TIDIG_COMP_CNT = 0
2197 EXCP_EN_MSB = 0
2198 LDS_SIZE = 0
2199 EXCP_EN = 0
2200 *
2201 */
2202 ptr[i++] = 0x00000010;
2203
2204
2205 /*
2206 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2207 WAVESIZE = 0
2208 *
2209 */
2210 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2211 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2212 ptr[i++] = 0x00000100;
2213
2214 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2215 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2216 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2217 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2218
2219 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2220 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2221 ptr[i++] = 0;
2222
2223 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2224 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2225 ptr[i++] = 1;
2226 ptr[i++] = 1;
2227 ptr[i++] = 1;
2228
2229
2230 /* Dispatch */
2231 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2232 ptr[i++] = 1;
2233 ptr[i++] = 1;
2234 ptr[i++] = 1;
2235 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2236
2237
2238 while (i & 7)
2239 ptr[i++] = 0xffff1000; /* type3 nop packet */
2240
2241 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2242 ib_info.ib_mc_address = ib_result_mc_address;
2243 ib_info.size = i;
2244
2245 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2246 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2247 ibs_request.ring = 0;
2248 ibs_request.number_of_ibs = 1;
2249 ibs_request.ibs = &ib_info;
2250 ibs_request.resources = bo_list;
2251 ibs_request.fence_info.handle = NULL;
2252
2253 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2254 CU_ASSERT_EQUAL(r, 0);
2255 seq_no = ibs_request.seq_no;
2256
2257
2258
2259 /* Prepare second command with dependency on the first */
2260 j = i;
2261 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2262 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2263 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2264 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2265 ptr[i++] = 99;
2266
2267 while (i & 7)
2268 ptr[i++] = 0xffff1000; /* type3 nop packet */
2269
2270 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2271 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2272 ib_info.size = i - j;
2273
2274 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2275 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2276 ibs_request.ring = 0;
2277 ibs_request.number_of_ibs = 1;
2278 ibs_request.ibs = &ib_info;
2279 ibs_request.resources = bo_list;
2280 ibs_request.fence_info.handle = NULL;
2281
2282 ibs_request.number_of_dependencies = 1;
2283
2284 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2285 ibs_request.dependencies[0].context = context_handle[1];
2286 ibs_request.dependencies[0].ip_instance = 0;
2287 ibs_request.dependencies[0].ring = 0;
2288 ibs_request.dependencies[0].fence = seq_no;
2289
2290
2291 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2292 CU_ASSERT_EQUAL(r, 0);
2293
2294
2295 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2296 fence_status.context = context_handle[0];
2297 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2298 fence_status.ip_instance = 0;
2299 fence_status.ring = 0;
2300 fence_status.fence = ibs_request.seq_no;
2301
2302 r = amdgpu_cs_query_fence_status(&fence_status,
2303 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2304 CU_ASSERT_EQUAL(r, 0);
2305
2306 /* Expect the second command to wait for shader to complete */
2307 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2308
2309 r = amdgpu_bo_list_destroy(bo_list);
2310 CU_ASSERT_EQUAL(r, 0);
2311
2312 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2313 ib_result_mc_address, 4096);
2314 CU_ASSERT_EQUAL(r, 0);
2315
2316 r = amdgpu_cs_ctx_free(context_handle[0]);
2317 CU_ASSERT_EQUAL(r, 0);
2318 r = amdgpu_cs_ctx_free(context_handle[1]);
2319 CU_ASSERT_EQUAL(r, 0);
2320
2321 free(ibs_request.dependencies);
2322 }
2323
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2324 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2325 {
2326 struct amdgpu_test_shader *shader;
2327 int i, loop = 0x10000;
2328
2329 switch (family) {
2330 case AMDGPU_FAMILY_AI:
2331 shader = &memcpy_cs_hang_slow_ai;
2332 break;
2333 case AMDGPU_FAMILY_RV:
2334 shader = &memcpy_cs_hang_slow_rv;
2335 break;
2336 default:
2337 return -1;
2338 break;
2339 }
2340
2341 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2342
2343 for (i = 0; i < loop; i++)
2344 memcpy(ptr + shader->header_length + shader->body_length * i,
2345 shader->shader + shader->header_length,
2346 shader->body_length * sizeof(uint32_t));
2347
2348 memcpy(ptr + shader->header_length + shader->body_length * loop,
2349 shader->shader + shader->header_length + shader->body_length,
2350 shader->foot_length * sizeof(uint32_t));
2351
2352 return 0;
2353 }
2354
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2355 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2356 int cs_type)
2357 {
2358 uint32_t shader_size;
2359 const uint32_t *shader;
2360
2361 switch (cs_type) {
2362 case CS_BUFFERCLEAR:
2363 shader = bufferclear_cs_shader_gfx9;
2364 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2365 break;
2366 case CS_BUFFERCOPY:
2367 shader = buffercopy_cs_shader_gfx9;
2368 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2369 break;
2370 case CS_HANG:
2371 shader = memcpy_ps_hang;
2372 shader_size = sizeof(memcpy_ps_hang);
2373 break;
2374 default:
2375 return -1;
2376 break;
2377 }
2378
2379 memcpy(ptr, shader, shader_size);
2380 return 0;
2381 }
2382
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2383 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2384 {
2385 int i = 0;
2386
2387 /* Write context control and load shadowing register if necessary */
2388 if (ip_type == AMDGPU_HW_IP_GFX) {
2389 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2390 ptr[i++] = 0x80000000;
2391 ptr[i++] = 0x80000000;
2392 }
2393
2394 /* Issue commands to set default compute state. */
2395 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2396 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2397 ptr[i++] = 0x204;
2398 i += 3;
2399
2400 /* clear mmCOMPUTE_TMPRING_SIZE */
2401 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2402 ptr[i++] = 0x218;
2403 ptr[i++] = 0;
2404
2405 return i;
2406 }
2407
amdgpu_dispatch_write_cumask(uint32_t * ptr)2408 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2409 {
2410 int i = 0;
2411
2412 /* Issue commands to set cu mask used in current dispatch */
2413 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2414 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2415 ptr[i++] = 0x216;
2416 ptr[i++] = 0xffffffff;
2417 ptr[i++] = 0xffffffff;
2418 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2419 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2420 ptr[i++] = 0x219;
2421 ptr[i++] = 0xffffffff;
2422 ptr[i++] = 0xffffffff;
2423
2424 return i;
2425 }
2426
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2427 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2428 {
2429 int i, j;
2430
2431 i = 0;
2432
2433 /* Writes shader state to HW */
2434 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2435 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2436 ptr[i++] = 0x20c;
2437 ptr[i++] = (shader_addr >> 8);
2438 ptr[i++] = (shader_addr >> 40);
2439 /* write sh regs*/
2440 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2441 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2442 /* - Gfx9ShRegBase */
2443 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2444 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2445 }
2446
2447 return i;
2448 }
2449
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2450 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2451 uint32_t ip_type,
2452 uint32_t ring)
2453 {
2454 amdgpu_context_handle context_handle;
2455 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2456 volatile unsigned char *ptr_dst;
2457 void *ptr_shader;
2458 uint32_t *ptr_cmd;
2459 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2460 amdgpu_va_handle va_dst, va_shader, va_cmd;
2461 int i, r;
2462 int bo_dst_size = 16384;
2463 int bo_shader_size = 4096;
2464 int bo_cmd_size = 4096;
2465 struct amdgpu_cs_request ibs_request = {0};
2466 struct amdgpu_cs_ib_info ib_info= {0};
2467 amdgpu_bo_list_handle bo_list;
2468 struct amdgpu_cs_fence fence_status = {0};
2469 uint32_t expired;
2470
2471 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2472 CU_ASSERT_EQUAL(r, 0);
2473
2474 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2475 AMDGPU_GEM_DOMAIN_GTT, 0,
2476 &bo_cmd, (void **)&ptr_cmd,
2477 &mc_address_cmd, &va_cmd);
2478 CU_ASSERT_EQUAL(r, 0);
2479 memset(ptr_cmd, 0, bo_cmd_size);
2480
2481 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2482 AMDGPU_GEM_DOMAIN_VRAM, 0,
2483 &bo_shader, &ptr_shader,
2484 &mc_address_shader, &va_shader);
2485 CU_ASSERT_EQUAL(r, 0);
2486 memset(ptr_shader, 0, bo_shader_size);
2487
2488 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2489 CU_ASSERT_EQUAL(r, 0);
2490
2491 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2492 AMDGPU_GEM_DOMAIN_VRAM, 0,
2493 &bo_dst, (void **)&ptr_dst,
2494 &mc_address_dst, &va_dst);
2495 CU_ASSERT_EQUAL(r, 0);
2496
2497 i = 0;
2498 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2499
2500 /* Issue commands to set cu mask used in current dispatch */
2501 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2502
2503 /* Writes shader state to HW */
2504 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2505
2506 /* Write constant data */
2507 /* Writes the UAV constant data to the SGPRs. */
2508 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2509 ptr_cmd[i++] = 0x240;
2510 ptr_cmd[i++] = mc_address_dst;
2511 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2512 ptr_cmd[i++] = 0x400;
2513 ptr_cmd[i++] = 0x74fac;
2514
2515 /* Sets a range of pixel shader constants */
2516 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2517 ptr_cmd[i++] = 0x244;
2518 ptr_cmd[i++] = 0x22222222;
2519 ptr_cmd[i++] = 0x22222222;
2520 ptr_cmd[i++] = 0x22222222;
2521 ptr_cmd[i++] = 0x22222222;
2522
2523 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2524 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2525 ptr_cmd[i++] = 0x215;
2526 ptr_cmd[i++] = 0;
2527
2528 /* dispatch direct command */
2529 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2530 ptr_cmd[i++] = 0x10;
2531 ptr_cmd[i++] = 1;
2532 ptr_cmd[i++] = 1;
2533 ptr_cmd[i++] = 1;
2534
2535 while (i & 7)
2536 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2537
2538 resources[0] = bo_dst;
2539 resources[1] = bo_shader;
2540 resources[2] = bo_cmd;
2541 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2542 CU_ASSERT_EQUAL(r, 0);
2543
2544 ib_info.ib_mc_address = mc_address_cmd;
2545 ib_info.size = i;
2546 ibs_request.ip_type = ip_type;
2547 ibs_request.ring = ring;
2548 ibs_request.resources = bo_list;
2549 ibs_request.number_of_ibs = 1;
2550 ibs_request.ibs = &ib_info;
2551 ibs_request.fence_info.handle = NULL;
2552
2553 /* submit CS */
2554 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2555 CU_ASSERT_EQUAL(r, 0);
2556
2557 r = amdgpu_bo_list_destroy(bo_list);
2558 CU_ASSERT_EQUAL(r, 0);
2559
2560 fence_status.ip_type = ip_type;
2561 fence_status.ip_instance = 0;
2562 fence_status.ring = ring;
2563 fence_status.context = context_handle;
2564 fence_status.fence = ibs_request.seq_no;
2565
2566 /* wait for IB accomplished */
2567 r = amdgpu_cs_query_fence_status(&fence_status,
2568 AMDGPU_TIMEOUT_INFINITE,
2569 0, &expired);
2570 CU_ASSERT_EQUAL(r, 0);
2571 CU_ASSERT_EQUAL(expired, true);
2572
2573 /* verify if memset test result meets with expected */
2574 i = 0;
2575 while(i < bo_dst_size) {
2576 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2577 }
2578
2579 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2580 CU_ASSERT_EQUAL(r, 0);
2581
2582 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2583 CU_ASSERT_EQUAL(r, 0);
2584
2585 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2586 CU_ASSERT_EQUAL(r, 0);
2587
2588 r = amdgpu_cs_ctx_free(context_handle);
2589 CU_ASSERT_EQUAL(r, 0);
2590 }
2591
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2592 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2593 uint32_t ip_type,
2594 uint32_t ring,
2595 int hang)
2596 {
2597 amdgpu_context_handle context_handle;
2598 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2599 volatile unsigned char *ptr_dst;
2600 void *ptr_shader;
2601 unsigned char *ptr_src;
2602 uint32_t *ptr_cmd;
2603 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2604 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2605 int i, r;
2606 int bo_dst_size = 16384;
2607 int bo_shader_size = 4096;
2608 int bo_cmd_size = 4096;
2609 struct amdgpu_cs_request ibs_request = {0};
2610 struct amdgpu_cs_ib_info ib_info= {0};
2611 uint32_t expired, hang_state, hangs;
2612 enum cs_type cs_type;
2613 amdgpu_bo_list_handle bo_list;
2614 struct amdgpu_cs_fence fence_status = {0};
2615
2616 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2617 CU_ASSERT_EQUAL(r, 0);
2618
2619 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2620 AMDGPU_GEM_DOMAIN_GTT, 0,
2621 &bo_cmd, (void **)&ptr_cmd,
2622 &mc_address_cmd, &va_cmd);
2623 CU_ASSERT_EQUAL(r, 0);
2624 memset(ptr_cmd, 0, bo_cmd_size);
2625
2626 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2627 AMDGPU_GEM_DOMAIN_VRAM, 0,
2628 &bo_shader, &ptr_shader,
2629 &mc_address_shader, &va_shader);
2630 CU_ASSERT_EQUAL(r, 0);
2631 memset(ptr_shader, 0, bo_shader_size);
2632
2633 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2634 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2635 CU_ASSERT_EQUAL(r, 0);
2636
2637 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2638 AMDGPU_GEM_DOMAIN_VRAM, 0,
2639 &bo_src, (void **)&ptr_src,
2640 &mc_address_src, &va_src);
2641 CU_ASSERT_EQUAL(r, 0);
2642
2643 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2644 AMDGPU_GEM_DOMAIN_VRAM, 0,
2645 &bo_dst, (void **)&ptr_dst,
2646 &mc_address_dst, &va_dst);
2647 CU_ASSERT_EQUAL(r, 0);
2648
2649 memset(ptr_src, 0x55, bo_dst_size);
2650
2651 i = 0;
2652 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2653
2654 /* Issue commands to set cu mask used in current dispatch */
2655 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2656
2657 /* Writes shader state to HW */
2658 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2659
2660 /* Write constant data */
2661 /* Writes the texture resource constants data to the SGPRs */
2662 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2663 ptr_cmd[i++] = 0x240;
2664 ptr_cmd[i++] = mc_address_src;
2665 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2666 ptr_cmd[i++] = 0x400;
2667 ptr_cmd[i++] = 0x74fac;
2668
2669 /* Writes the UAV constant data to the SGPRs. */
2670 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2671 ptr_cmd[i++] = 0x244;
2672 ptr_cmd[i++] = mc_address_dst;
2673 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2674 ptr_cmd[i++] = 0x400;
2675 ptr_cmd[i++] = 0x74fac;
2676
2677 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2678 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2679 ptr_cmd[i++] = 0x215;
2680 ptr_cmd[i++] = 0;
2681
2682 /* dispatch direct command */
2683 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2684 ptr_cmd[i++] = 0x10;
2685 ptr_cmd[i++] = 1;
2686 ptr_cmd[i++] = 1;
2687 ptr_cmd[i++] = 1;
2688
2689 while (i & 7)
2690 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2691
2692 resources[0] = bo_shader;
2693 resources[1] = bo_src;
2694 resources[2] = bo_dst;
2695 resources[3] = bo_cmd;
2696 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2697 CU_ASSERT_EQUAL(r, 0);
2698
2699 ib_info.ib_mc_address = mc_address_cmd;
2700 ib_info.size = i;
2701 ibs_request.ip_type = ip_type;
2702 ibs_request.ring = ring;
2703 ibs_request.resources = bo_list;
2704 ibs_request.number_of_ibs = 1;
2705 ibs_request.ibs = &ib_info;
2706 ibs_request.fence_info.handle = NULL;
2707 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2708 CU_ASSERT_EQUAL(r, 0);
2709
2710 fence_status.ip_type = ip_type;
2711 fence_status.ip_instance = 0;
2712 fence_status.ring = ring;
2713 fence_status.context = context_handle;
2714 fence_status.fence = ibs_request.seq_no;
2715
2716 /* wait for IB accomplished */
2717 r = amdgpu_cs_query_fence_status(&fence_status,
2718 AMDGPU_TIMEOUT_INFINITE,
2719 0, &expired);
2720
2721 if (!hang) {
2722 CU_ASSERT_EQUAL(r, 0);
2723 CU_ASSERT_EQUAL(expired, true);
2724
2725 /* verify if memcpy test result meets with expected */
2726 i = 0;
2727 while(i < bo_dst_size) {
2728 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2729 i++;
2730 }
2731 } else {
2732 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2733 CU_ASSERT_EQUAL(r, 0);
2734 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2735 }
2736
2737 r = amdgpu_bo_list_destroy(bo_list);
2738 CU_ASSERT_EQUAL(r, 0);
2739
2740 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2741 CU_ASSERT_EQUAL(r, 0);
2742 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2743 CU_ASSERT_EQUAL(r, 0);
2744
2745 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2746 CU_ASSERT_EQUAL(r, 0);
2747
2748 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2749 CU_ASSERT_EQUAL(r, 0);
2750
2751 r = amdgpu_cs_ctx_free(context_handle);
2752 CU_ASSERT_EQUAL(r, 0);
2753 }
2754
amdgpu_compute_dispatch_test(void)2755 static void amdgpu_compute_dispatch_test(void)
2756 {
2757 int r;
2758 struct drm_amdgpu_info_hw_ip info;
2759 uint32_t ring_id;
2760
2761 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2762 CU_ASSERT_EQUAL(r, 0);
2763 if (!info.available_rings)
2764 printf("SKIP ... as there's no compute ring\n");
2765
2766 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2767 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2768 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2769 }
2770 }
2771
amdgpu_gfx_dispatch_test(void)2772 static void amdgpu_gfx_dispatch_test(void)
2773 {
2774 int r;
2775 struct drm_amdgpu_info_hw_ip info;
2776 uint32_t ring_id;
2777
2778 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2779 CU_ASSERT_EQUAL(r, 0);
2780 if (!info.available_rings)
2781 printf("SKIP ... as there's no graphics ring\n");
2782
2783 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2784 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2785 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2786 }
2787 }
2788
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2789 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2790 {
2791 int r;
2792 struct drm_amdgpu_info_hw_ip info;
2793 uint32_t ring_id;
2794
2795 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2796 CU_ASSERT_EQUAL(r, 0);
2797 if (!info.available_rings)
2798 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2799
2800 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2801 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2802 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2803 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2804 }
2805 }
2806
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2807 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2808 uint32_t ip_type, uint32_t ring)
2809 {
2810 amdgpu_context_handle context_handle;
2811 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2812 volatile unsigned char *ptr_dst;
2813 void *ptr_shader;
2814 unsigned char *ptr_src;
2815 uint32_t *ptr_cmd;
2816 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2817 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2818 int i, r;
2819 int bo_dst_size = 0x4000000;
2820 int bo_shader_size = 0x400000;
2821 int bo_cmd_size = 4096;
2822 struct amdgpu_cs_request ibs_request = {0};
2823 struct amdgpu_cs_ib_info ib_info= {0};
2824 uint32_t hang_state, hangs, expired;
2825 struct amdgpu_gpu_info gpu_info = {0};
2826 amdgpu_bo_list_handle bo_list;
2827 struct amdgpu_cs_fence fence_status = {0};
2828
2829 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2830 CU_ASSERT_EQUAL(r, 0);
2831
2832 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2833 CU_ASSERT_EQUAL(r, 0);
2834
2835 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2836 AMDGPU_GEM_DOMAIN_GTT, 0,
2837 &bo_cmd, (void **)&ptr_cmd,
2838 &mc_address_cmd, &va_cmd);
2839 CU_ASSERT_EQUAL(r, 0);
2840 memset(ptr_cmd, 0, bo_cmd_size);
2841
2842 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2843 AMDGPU_GEM_DOMAIN_VRAM, 0,
2844 &bo_shader, &ptr_shader,
2845 &mc_address_shader, &va_shader);
2846 CU_ASSERT_EQUAL(r, 0);
2847 memset(ptr_shader, 0, bo_shader_size);
2848
2849 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2850 CU_ASSERT_EQUAL(r, 0);
2851
2852 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2853 AMDGPU_GEM_DOMAIN_VRAM, 0,
2854 &bo_src, (void **)&ptr_src,
2855 &mc_address_src, &va_src);
2856 CU_ASSERT_EQUAL(r, 0);
2857
2858 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2859 AMDGPU_GEM_DOMAIN_VRAM, 0,
2860 &bo_dst, (void **)&ptr_dst,
2861 &mc_address_dst, &va_dst);
2862 CU_ASSERT_EQUAL(r, 0);
2863
2864 memset(ptr_src, 0x55, bo_dst_size);
2865
2866 i = 0;
2867 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2868
2869 /* Issue commands to set cu mask used in current dispatch */
2870 i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2871
2872 /* Writes shader state to HW */
2873 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2874
2875 /* Write constant data */
2876 /* Writes the texture resource constants data to the SGPRs */
2877 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2878 ptr_cmd[i++] = 0x240;
2879 ptr_cmd[i++] = mc_address_src;
2880 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2881 ptr_cmd[i++] = 0x400000;
2882 ptr_cmd[i++] = 0x74fac;
2883
2884 /* Writes the UAV constant data to the SGPRs. */
2885 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2886 ptr_cmd[i++] = 0x244;
2887 ptr_cmd[i++] = mc_address_dst;
2888 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2889 ptr_cmd[i++] = 0x400000;
2890 ptr_cmd[i++] = 0x74fac;
2891
2892 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2893 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2894 ptr_cmd[i++] = 0x215;
2895 ptr_cmd[i++] = 0;
2896
2897 /* dispatch direct command */
2898 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2899 ptr_cmd[i++] = 0x10000;
2900 ptr_cmd[i++] = 1;
2901 ptr_cmd[i++] = 1;
2902 ptr_cmd[i++] = 1;
2903
2904 while (i & 7)
2905 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2906
2907 resources[0] = bo_shader;
2908 resources[1] = bo_src;
2909 resources[2] = bo_dst;
2910 resources[3] = bo_cmd;
2911 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2912 CU_ASSERT_EQUAL(r, 0);
2913
2914 ib_info.ib_mc_address = mc_address_cmd;
2915 ib_info.size = i;
2916 ibs_request.ip_type = ip_type;
2917 ibs_request.ring = ring;
2918 ibs_request.resources = bo_list;
2919 ibs_request.number_of_ibs = 1;
2920 ibs_request.ibs = &ib_info;
2921 ibs_request.fence_info.handle = NULL;
2922 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2923 CU_ASSERT_EQUAL(r, 0);
2924
2925 fence_status.ip_type = ip_type;
2926 fence_status.ip_instance = 0;
2927 fence_status.ring = ring;
2928 fence_status.context = context_handle;
2929 fence_status.fence = ibs_request.seq_no;
2930
2931 /* wait for IB accomplished */
2932 r = amdgpu_cs_query_fence_status(&fence_status,
2933 AMDGPU_TIMEOUT_INFINITE,
2934 0, &expired);
2935
2936 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2937 CU_ASSERT_EQUAL(r, 0);
2938 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2939
2940 r = amdgpu_bo_list_destroy(bo_list);
2941 CU_ASSERT_EQUAL(r, 0);
2942
2943 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2944 CU_ASSERT_EQUAL(r, 0);
2945 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2946 CU_ASSERT_EQUAL(r, 0);
2947
2948 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2949 CU_ASSERT_EQUAL(r, 0);
2950
2951 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2952 CU_ASSERT_EQUAL(r, 0);
2953
2954 r = amdgpu_cs_ctx_free(context_handle);
2955 CU_ASSERT_EQUAL(r, 0);
2956 }
2957
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2958 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2959 {
2960 int r;
2961 struct drm_amdgpu_info_hw_ip info;
2962 uint32_t ring_id;
2963
2964 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2965 CU_ASSERT_EQUAL(r, 0);
2966 if (!info.available_rings)
2967 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2968
2969 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2970 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2971 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2972 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2973 }
2974 }
2975
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2976 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2977 {
2978 struct amdgpu_test_shader *shader;
2979 int i, loop = 0x40000;
2980
2981 switch (family) {
2982 case AMDGPU_FAMILY_AI:
2983 case AMDGPU_FAMILY_RV:
2984 shader = &memcpy_ps_hang_slow_ai;
2985 break;
2986 default:
2987 return -1;
2988 break;
2989 }
2990
2991 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2992
2993 for (i = 0; i < loop; i++)
2994 memcpy(ptr + shader->header_length + shader->body_length * i,
2995 shader->shader + shader->header_length,
2996 shader->body_length * sizeof(uint32_t));
2997
2998 memcpy(ptr + shader->header_length + shader->body_length * loop,
2999 shader->shader + shader->header_length + shader->body_length,
3000 shader->foot_length * sizeof(uint32_t));
3001
3002 return 0;
3003 }
3004
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)3005 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
3006 {
3007 int i;
3008 uint32_t shader_offset= 256;
3009 uint32_t mem_offset, patch_code_offset;
3010 uint32_t shader_size, patchinfo_code_size;
3011 const uint32_t *shader;
3012 const uint32_t *patchinfo_code;
3013 const uint32_t *patchcode_offset;
3014
3015 switch (ps_type) {
3016 case PS_CONST:
3017 shader = ps_const_shader_gfx9;
3018 shader_size = sizeof(ps_const_shader_gfx9);
3019 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3020 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3021 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3022 break;
3023 case PS_TEX:
3024 shader = ps_tex_shader_gfx9;
3025 shader_size = sizeof(ps_tex_shader_gfx9);
3026 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3027 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3028 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3029 break;
3030 case PS_HANG:
3031 shader = memcpy_ps_hang;
3032 shader_size = sizeof(memcpy_ps_hang);
3033
3034 memcpy(ptr, shader, shader_size);
3035 return 0;
3036 default:
3037 return -1;
3038 break;
3039 }
3040
3041 /* write main shader program */
3042 for (i = 0 ; i < 10; i++) {
3043 mem_offset = i * shader_offset;
3044 memcpy(ptr + mem_offset, shader, shader_size);
3045 }
3046
3047 /* overwrite patch codes */
3048 for (i = 0 ; i < 10; i++) {
3049 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3050 patch_code_offset = i * patchinfo_code_size;
3051 memcpy(ptr + mem_offset,
3052 patchinfo_code + patch_code_offset,
3053 patchinfo_code_size * sizeof(uint32_t));
3054 }
3055
3056 return 0;
3057 }
3058
3059 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3060 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3061 {
3062 const uint32_t *shader;
3063 uint32_t shader_size;
3064
3065 shader = vs_RectPosTexFast_shader_gfx9;
3066 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3067
3068 memcpy(ptr, shader, shader_size);
3069
3070 return 0;
3071 }
3072
amdgpu_draw_init(uint32_t * ptr)3073 static int amdgpu_draw_init(uint32_t *ptr)
3074 {
3075 int i = 0;
3076 const uint32_t *preamblecache_ptr;
3077 uint32_t preamblecache_size;
3078
3079 /* Write context control and load shadowing register if necessary */
3080 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3081 ptr[i++] = 0x80000000;
3082 ptr[i++] = 0x80000000;
3083
3084 preamblecache_ptr = preamblecache_gfx9;
3085 preamblecache_size = sizeof(preamblecache_gfx9);
3086
3087 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3088 return i + preamblecache_size/sizeof(uint32_t);
3089 }
3090
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3091 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3092 uint64_t dst_addr,
3093 int hang_slow)
3094 {
3095 int i = 0;
3096
3097 /* setup color buffer */
3098 /* offset reg
3099 0xA318 CB_COLOR0_BASE
3100 0xA319 CB_COLOR0_BASE_EXT
3101 0xA31A CB_COLOR0_ATTRIB2
3102 0xA31B CB_COLOR0_VIEW
3103 0xA31C CB_COLOR0_INFO
3104 0xA31D CB_COLOR0_ATTRIB
3105 0xA31E CB_COLOR0_DCC_CONTROL
3106 0xA31F CB_COLOR0_CMASK
3107 0xA320 CB_COLOR0_CMASK_BASE_EXT
3108 0xA321 CB_COLOR0_FMASK
3109 0xA322 CB_COLOR0_FMASK_BASE_EXT
3110 0xA323 CB_COLOR0_CLEAR_WORD0
3111 0xA324 CB_COLOR0_CLEAR_WORD1
3112 0xA325 CB_COLOR0_DCC_BASE
3113 0xA326 CB_COLOR0_DCC_BASE_EXT */
3114 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3115 ptr[i++] = 0x318;
3116 ptr[i++] = dst_addr >> 8;
3117 ptr[i++] = dst_addr >> 40;
3118 ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3119 ptr[i++] = 0;
3120 ptr[i++] = 0x50438;
3121 ptr[i++] = 0x10140000;
3122 i += 9;
3123
3124 /* mmCB_MRT0_EPITCH */
3125 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3126 ptr[i++] = 0x1e8;
3127 ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3128
3129 /* 0xA32B CB_COLOR1_BASE */
3130 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3131 ptr[i++] = 0x32b;
3132 ptr[i++] = 0;
3133
3134 /* 0xA33A CB_COLOR1_BASE */
3135 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3136 ptr[i++] = 0x33a;
3137 ptr[i++] = 0;
3138
3139 /* SPI_SHADER_COL_FORMAT */
3140 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3141 ptr[i++] = 0x1c5;
3142 ptr[i++] = 9;
3143
3144 /* Setup depth buffer */
3145 /* mmDB_Z_INFO */
3146 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3147 ptr[i++] = 0xe;
3148 i += 2;
3149
3150 return i;
3151 }
3152
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3153 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3154 {
3155 int i = 0;
3156 const uint32_t *cached_cmd_ptr;
3157 uint32_t cached_cmd_size;
3158
3159 /* mmPA_SC_TILE_STEERING_OVERRIDE */
3160 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3161 ptr[i++] = 0xd7;
3162 ptr[i++] = 0;
3163
3164 ptr[i++] = 0xffff1000;
3165 ptr[i++] = 0xc0021000;
3166
3167 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3168 ptr[i++] = 0xd7;
3169 ptr[i++] = 1;
3170
3171 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3172 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3173 ptr[i++] = 0x2fe;
3174 i += 16;
3175
3176 /* mmPA_SC_CENTROID_PRIORITY_0 */
3177 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3178 ptr[i++] = 0x2f5;
3179 i += 2;
3180
3181 cached_cmd_ptr = cached_cmd_gfx9;
3182 cached_cmd_size = sizeof(cached_cmd_gfx9);
3183
3184 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3185 if (hang_slow)
3186 *(ptr + i + 12) = 0x8000800;
3187 i += cached_cmd_size/sizeof(uint32_t);
3188
3189 return i;
3190 }
3191
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3192 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3193 int ps_type,
3194 uint64_t shader_addr,
3195 int hang_slow)
3196 {
3197 int i = 0;
3198
3199 /* mmPA_CL_VS_OUT_CNTL */
3200 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3201 ptr[i++] = 0x207;
3202 ptr[i++] = 0;
3203
3204 /* mmSPI_SHADER_PGM_RSRC3_VS */
3205 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3206 ptr[i++] = 0x46;
3207 ptr[i++] = 0xffff;
3208
3209 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3210 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3211 ptr[i++] = 0x48;
3212 ptr[i++] = shader_addr >> 8;
3213 ptr[i++] = shader_addr >> 40;
3214
3215 /* mmSPI_SHADER_PGM_RSRC1_VS */
3216 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3217 ptr[i++] = 0x4a;
3218 ptr[i++] = 0xc0081;
3219 /* mmSPI_SHADER_PGM_RSRC2_VS */
3220 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3221 ptr[i++] = 0x4b;
3222 ptr[i++] = 0x18;
3223
3224 /* mmSPI_VS_OUT_CONFIG */
3225 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3226 ptr[i++] = 0x1b1;
3227 ptr[i++] = 2;
3228
3229 /* mmSPI_SHADER_POS_FORMAT */
3230 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3231 ptr[i++] = 0x1c3;
3232 ptr[i++] = 4;
3233
3234 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3235 ptr[i++] = 0x4c;
3236 i += 2;
3237 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3238 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3239
3240 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3241 ptr[i++] = 0x50;
3242 i += 2;
3243 if (ps_type == PS_CONST) {
3244 i += 2;
3245 } else if (ps_type == PS_TEX) {
3246 ptr[i++] = 0x3f800000;
3247 ptr[i++] = 0x3f800000;
3248 }
3249
3250 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3251 ptr[i++] = 0x54;
3252 i += 4;
3253
3254 return i;
3255 }
3256
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3257 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3258 int ps_type,
3259 uint64_t shader_addr)
3260 {
3261 int i, j;
3262 const uint32_t *sh_registers;
3263 const uint32_t *context_registers;
3264 uint32_t num_sh_reg, num_context_reg;
3265
3266 if (ps_type == PS_CONST) {
3267 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3268 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3269 num_sh_reg = ps_num_sh_registers_gfx9;
3270 num_context_reg = ps_num_context_registers_gfx9;
3271 } else if (ps_type == PS_TEX) {
3272 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3273 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3274 num_sh_reg = ps_num_sh_registers_gfx9;
3275 num_context_reg = ps_num_context_registers_gfx9;
3276 }
3277
3278 i = 0;
3279
3280 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3281 0x2c08 SPI_SHADER_PGM_LO_PS
3282 0x2c09 SPI_SHADER_PGM_HI_PS */
3283 shader_addr += 256 * 9;
3284 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3285 ptr[i++] = 0x7;
3286 ptr[i++] = 0xffff;
3287 ptr[i++] = shader_addr >> 8;
3288 ptr[i++] = shader_addr >> 40;
3289
3290 for (j = 0; j < num_sh_reg; j++) {
3291 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3292 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3293 ptr[i++] = sh_registers[j * 2 + 1];
3294 }
3295
3296 for (j = 0; j < num_context_reg; j++) {
3297 if (context_registers[j * 2] != 0xA1C5) {
3298 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3299 ptr[i++] = context_registers[j * 2] - 0xa000;
3300 ptr[i++] = context_registers[j * 2 + 1];
3301 }
3302
3303 if (context_registers[j * 2] == 0xA1B4) {
3304 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3305 ptr[i++] = 0x1b3;
3306 ptr[i++] = 2;
3307 }
3308 }
3309
3310 return i;
3311 }
3312
amdgpu_draw_draw(uint32_t * ptr)3313 static int amdgpu_draw_draw(uint32_t *ptr)
3314 {
3315 int i = 0;
3316
3317 /* mmIA_MULTI_VGT_PARAM */
3318 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3319 ptr[i++] = 0x40000258;
3320 ptr[i++] = 0xd00ff;
3321
3322 /* mmVGT_PRIMITIVE_TYPE */
3323 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3324 ptr[i++] = 0x10000242;
3325 ptr[i++] = 0x11;
3326
3327 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3328 ptr[i++] = 3;
3329 ptr[i++] = 2;
3330
3331 return i;
3332 }
3333
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3334 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3335 amdgpu_bo_handle bo_shader_ps,
3336 amdgpu_bo_handle bo_shader_vs,
3337 uint64_t mc_address_shader_ps,
3338 uint64_t mc_address_shader_vs,
3339 uint32_t ring_id)
3340 {
3341 amdgpu_context_handle context_handle;
3342 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3343 volatile unsigned char *ptr_dst;
3344 uint32_t *ptr_cmd;
3345 uint64_t mc_address_dst, mc_address_cmd;
3346 amdgpu_va_handle va_dst, va_cmd;
3347 int i, r;
3348 int bo_dst_size = 16384;
3349 int bo_cmd_size = 4096;
3350 struct amdgpu_cs_request ibs_request = {0};
3351 struct amdgpu_cs_ib_info ib_info = {0};
3352 struct amdgpu_cs_fence fence_status = {0};
3353 uint32_t expired;
3354 amdgpu_bo_list_handle bo_list;
3355
3356 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3357 CU_ASSERT_EQUAL(r, 0);
3358
3359 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3360 AMDGPU_GEM_DOMAIN_GTT, 0,
3361 &bo_cmd, (void **)&ptr_cmd,
3362 &mc_address_cmd, &va_cmd);
3363 CU_ASSERT_EQUAL(r, 0);
3364 memset(ptr_cmd, 0, bo_cmd_size);
3365
3366 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3367 AMDGPU_GEM_DOMAIN_VRAM, 0,
3368 &bo_dst, (void **)&ptr_dst,
3369 &mc_address_dst, &va_dst);
3370 CU_ASSERT_EQUAL(r, 0);
3371
3372 i = 0;
3373 i += amdgpu_draw_init(ptr_cmd + i);
3374
3375 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3376
3377 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3378
3379 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3380
3381 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3382
3383 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3384 ptr_cmd[i++] = 0xc;
3385 ptr_cmd[i++] = 0x33333333;
3386 ptr_cmd[i++] = 0x33333333;
3387 ptr_cmd[i++] = 0x33333333;
3388 ptr_cmd[i++] = 0x33333333;
3389
3390 i += amdgpu_draw_draw(ptr_cmd + i);
3391
3392 while (i & 7)
3393 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3394
3395 resources[0] = bo_dst;
3396 resources[1] = bo_shader_ps;
3397 resources[2] = bo_shader_vs;
3398 resources[3] = bo_cmd;
3399 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3400 CU_ASSERT_EQUAL(r, 0);
3401
3402 ib_info.ib_mc_address = mc_address_cmd;
3403 ib_info.size = i;
3404 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3405 ibs_request.ring = ring_id;
3406 ibs_request.resources = bo_list;
3407 ibs_request.number_of_ibs = 1;
3408 ibs_request.ibs = &ib_info;
3409 ibs_request.fence_info.handle = NULL;
3410
3411 /* submit CS */
3412 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3413 CU_ASSERT_EQUAL(r, 0);
3414
3415 r = amdgpu_bo_list_destroy(bo_list);
3416 CU_ASSERT_EQUAL(r, 0);
3417
3418 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3419 fence_status.ip_instance = 0;
3420 fence_status.ring = ring_id;
3421 fence_status.context = context_handle;
3422 fence_status.fence = ibs_request.seq_no;
3423
3424 /* wait for IB accomplished */
3425 r = amdgpu_cs_query_fence_status(&fence_status,
3426 AMDGPU_TIMEOUT_INFINITE,
3427 0, &expired);
3428 CU_ASSERT_EQUAL(r, 0);
3429 CU_ASSERT_EQUAL(expired, true);
3430
3431 /* verify if memset test result meets with expected */
3432 i = 0;
3433 while(i < bo_dst_size) {
3434 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3435 }
3436
3437 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3438 CU_ASSERT_EQUAL(r, 0);
3439
3440 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3441 CU_ASSERT_EQUAL(r, 0);
3442
3443 r = amdgpu_cs_ctx_free(context_handle);
3444 CU_ASSERT_EQUAL(r, 0);
3445 }
3446
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3447 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3448 uint32_t ring)
3449 {
3450 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3451 void *ptr_shader_ps;
3452 void *ptr_shader_vs;
3453 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3454 amdgpu_va_handle va_shader_ps, va_shader_vs;
3455 int r;
3456 int bo_shader_size = 4096;
3457
3458 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3459 AMDGPU_GEM_DOMAIN_VRAM, 0,
3460 &bo_shader_ps, &ptr_shader_ps,
3461 &mc_address_shader_ps, &va_shader_ps);
3462 CU_ASSERT_EQUAL(r, 0);
3463 memset(ptr_shader_ps, 0, bo_shader_size);
3464
3465 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3466 AMDGPU_GEM_DOMAIN_VRAM, 0,
3467 &bo_shader_vs, &ptr_shader_vs,
3468 &mc_address_shader_vs, &va_shader_vs);
3469 CU_ASSERT_EQUAL(r, 0);
3470 memset(ptr_shader_vs, 0, bo_shader_size);
3471
3472 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3473 CU_ASSERT_EQUAL(r, 0);
3474
3475 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3476 CU_ASSERT_EQUAL(r, 0);
3477
3478 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3479 mc_address_shader_ps, mc_address_shader_vs, ring);
3480
3481 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3482 CU_ASSERT_EQUAL(r, 0);
3483
3484 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3485 CU_ASSERT_EQUAL(r, 0);
3486 }
3487
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3488 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3489 amdgpu_bo_handle bo_shader_ps,
3490 amdgpu_bo_handle bo_shader_vs,
3491 uint64_t mc_address_shader_ps,
3492 uint64_t mc_address_shader_vs,
3493 uint32_t ring, int hang)
3494 {
3495 amdgpu_context_handle context_handle;
3496 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3497 volatile unsigned char *ptr_dst;
3498 unsigned char *ptr_src;
3499 uint32_t *ptr_cmd;
3500 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3501 amdgpu_va_handle va_dst, va_src, va_cmd;
3502 int i, r;
3503 int bo_size = 16384;
3504 int bo_cmd_size = 4096;
3505 struct amdgpu_cs_request ibs_request = {0};
3506 struct amdgpu_cs_ib_info ib_info= {0};
3507 uint32_t hang_state, hangs;
3508 uint32_t expired;
3509 amdgpu_bo_list_handle bo_list;
3510 struct amdgpu_cs_fence fence_status = {0};
3511
3512 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3513 CU_ASSERT_EQUAL(r, 0);
3514
3515 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3516 AMDGPU_GEM_DOMAIN_GTT, 0,
3517 &bo_cmd, (void **)&ptr_cmd,
3518 &mc_address_cmd, &va_cmd);
3519 CU_ASSERT_EQUAL(r, 0);
3520 memset(ptr_cmd, 0, bo_cmd_size);
3521
3522 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3523 AMDGPU_GEM_DOMAIN_VRAM, 0,
3524 &bo_src, (void **)&ptr_src,
3525 &mc_address_src, &va_src);
3526 CU_ASSERT_EQUAL(r, 0);
3527
3528 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3529 AMDGPU_GEM_DOMAIN_VRAM, 0,
3530 &bo_dst, (void **)&ptr_dst,
3531 &mc_address_dst, &va_dst);
3532 CU_ASSERT_EQUAL(r, 0);
3533
3534 memset(ptr_src, 0x55, bo_size);
3535
3536 i = 0;
3537 i += amdgpu_draw_init(ptr_cmd + i);
3538
3539 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3540
3541 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3542
3543 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3544
3545 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3546
3547 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3548 ptr_cmd[i++] = 0xc;
3549 ptr_cmd[i++] = mc_address_src >> 8;
3550 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3551 ptr_cmd[i++] = 0x7c01f;
3552 ptr_cmd[i++] = 0x90500fac;
3553 ptr_cmd[i++] = 0x3e000;
3554 i += 3;
3555
3556 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3557 ptr_cmd[i++] = 0x14;
3558 ptr_cmd[i++] = 0x92;
3559 i += 3;
3560
3561 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3562 ptr_cmd[i++] = 0x191;
3563 ptr_cmd[i++] = 0;
3564
3565 i += amdgpu_draw_draw(ptr_cmd + i);
3566
3567 while (i & 7)
3568 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3569
3570 resources[0] = bo_dst;
3571 resources[1] = bo_src;
3572 resources[2] = bo_shader_ps;
3573 resources[3] = bo_shader_vs;
3574 resources[4] = bo_cmd;
3575 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3576 CU_ASSERT_EQUAL(r, 0);
3577
3578 ib_info.ib_mc_address = mc_address_cmd;
3579 ib_info.size = i;
3580 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3581 ibs_request.ring = ring;
3582 ibs_request.resources = bo_list;
3583 ibs_request.number_of_ibs = 1;
3584 ibs_request.ibs = &ib_info;
3585 ibs_request.fence_info.handle = NULL;
3586 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3587 CU_ASSERT_EQUAL(r, 0);
3588
3589 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3590 fence_status.ip_instance = 0;
3591 fence_status.ring = ring;
3592 fence_status.context = context_handle;
3593 fence_status.fence = ibs_request.seq_no;
3594
3595 /* wait for IB accomplished */
3596 r = amdgpu_cs_query_fence_status(&fence_status,
3597 AMDGPU_TIMEOUT_INFINITE,
3598 0, &expired);
3599 if (!hang) {
3600 CU_ASSERT_EQUAL(r, 0);
3601 CU_ASSERT_EQUAL(expired, true);
3602
3603 /* verify if memcpy test result meets with expected */
3604 i = 0;
3605 while(i < bo_size) {
3606 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3607 i++;
3608 }
3609 } else {
3610 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3611 CU_ASSERT_EQUAL(r, 0);
3612 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3613 }
3614
3615 r = amdgpu_bo_list_destroy(bo_list);
3616 CU_ASSERT_EQUAL(r, 0);
3617
3618 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3619 CU_ASSERT_EQUAL(r, 0);
3620 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3621 CU_ASSERT_EQUAL(r, 0);
3622
3623 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3624 CU_ASSERT_EQUAL(r, 0);
3625
3626 r = amdgpu_cs_ctx_free(context_handle);
3627 CU_ASSERT_EQUAL(r, 0);
3628 }
3629
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3630 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3631 int hang)
3632 {
3633 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3634 void *ptr_shader_ps;
3635 void *ptr_shader_vs;
3636 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3637 amdgpu_va_handle va_shader_ps, va_shader_vs;
3638 int bo_shader_size = 4096;
3639 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3640 int r;
3641
3642 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3643 AMDGPU_GEM_DOMAIN_VRAM, 0,
3644 &bo_shader_ps, &ptr_shader_ps,
3645 &mc_address_shader_ps, &va_shader_ps);
3646 CU_ASSERT_EQUAL(r, 0);
3647 memset(ptr_shader_ps, 0, bo_shader_size);
3648
3649 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3650 AMDGPU_GEM_DOMAIN_VRAM, 0,
3651 &bo_shader_vs, &ptr_shader_vs,
3652 &mc_address_shader_vs, &va_shader_vs);
3653 CU_ASSERT_EQUAL(r, 0);
3654 memset(ptr_shader_vs, 0, bo_shader_size);
3655
3656 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3657 CU_ASSERT_EQUAL(r, 0);
3658
3659 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3660 CU_ASSERT_EQUAL(r, 0);
3661
3662 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3663 mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3664
3665 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3666 CU_ASSERT_EQUAL(r, 0);
3667
3668 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3669 CU_ASSERT_EQUAL(r, 0);
3670 }
3671
amdgpu_draw_test(void)3672 static void amdgpu_draw_test(void)
3673 {
3674 int r;
3675 struct drm_amdgpu_info_hw_ip info;
3676 uint32_t ring_id;
3677
3678 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3679 CU_ASSERT_EQUAL(r, 0);
3680 if (!info.available_rings)
3681 printf("SKIP ... as there's no graphics ring\n");
3682
3683 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3684 amdgpu_memset_draw_test(device_handle, ring_id);
3685 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3686 }
3687 }
3688
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3689 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3690 {
3691 amdgpu_context_handle context_handle;
3692 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3693 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3694 void *ptr_shader_ps;
3695 void *ptr_shader_vs;
3696 volatile unsigned char *ptr_dst;
3697 unsigned char *ptr_src;
3698 uint32_t *ptr_cmd;
3699 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3700 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3701 amdgpu_va_handle va_shader_ps, va_shader_vs;
3702 amdgpu_va_handle va_dst, va_src, va_cmd;
3703 struct amdgpu_gpu_info gpu_info = {0};
3704 int i, r;
3705 int bo_size = 0x4000000;
3706 int bo_shader_ps_size = 0x400000;
3707 int bo_shader_vs_size = 4096;
3708 int bo_cmd_size = 4096;
3709 struct amdgpu_cs_request ibs_request = {0};
3710 struct amdgpu_cs_ib_info ib_info= {0};
3711 uint32_t hang_state, hangs, expired;
3712 amdgpu_bo_list_handle bo_list;
3713 struct amdgpu_cs_fence fence_status = {0};
3714
3715 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3716 CU_ASSERT_EQUAL(r, 0);
3717
3718 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3719 CU_ASSERT_EQUAL(r, 0);
3720
3721 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3722 AMDGPU_GEM_DOMAIN_GTT, 0,
3723 &bo_cmd, (void **)&ptr_cmd,
3724 &mc_address_cmd, &va_cmd);
3725 CU_ASSERT_EQUAL(r, 0);
3726 memset(ptr_cmd, 0, bo_cmd_size);
3727
3728 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3729 AMDGPU_GEM_DOMAIN_VRAM, 0,
3730 &bo_shader_ps, &ptr_shader_ps,
3731 &mc_address_shader_ps, &va_shader_ps);
3732 CU_ASSERT_EQUAL(r, 0);
3733 memset(ptr_shader_ps, 0, bo_shader_ps_size);
3734
3735 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3736 AMDGPU_GEM_DOMAIN_VRAM, 0,
3737 &bo_shader_vs, &ptr_shader_vs,
3738 &mc_address_shader_vs, &va_shader_vs);
3739 CU_ASSERT_EQUAL(r, 0);
3740 memset(ptr_shader_vs, 0, bo_shader_vs_size);
3741
3742 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3743 CU_ASSERT_EQUAL(r, 0);
3744
3745 r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3746 CU_ASSERT_EQUAL(r, 0);
3747
3748 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3749 AMDGPU_GEM_DOMAIN_VRAM, 0,
3750 &bo_src, (void **)&ptr_src,
3751 &mc_address_src, &va_src);
3752 CU_ASSERT_EQUAL(r, 0);
3753
3754 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3755 AMDGPU_GEM_DOMAIN_VRAM, 0,
3756 &bo_dst, (void **)&ptr_dst,
3757 &mc_address_dst, &va_dst);
3758 CU_ASSERT_EQUAL(r, 0);
3759
3760 memset(ptr_src, 0x55, bo_size);
3761
3762 i = 0;
3763 i += amdgpu_draw_init(ptr_cmd + i);
3764
3765 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3766
3767 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3768
3769 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3770 mc_address_shader_vs, 1);
3771
3772 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3773
3774 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3775 ptr_cmd[i++] = 0xc;
3776 ptr_cmd[i++] = mc_address_src >> 8;
3777 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3778 ptr_cmd[i++] = 0x1ffc7ff;
3779 ptr_cmd[i++] = 0x90500fac;
3780 ptr_cmd[i++] = 0xffe000;
3781 i += 3;
3782
3783 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3784 ptr_cmd[i++] = 0x14;
3785 ptr_cmd[i++] = 0x92;
3786 i += 3;
3787
3788 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3789 ptr_cmd[i++] = 0x191;
3790 ptr_cmd[i++] = 0;
3791
3792 i += amdgpu_draw_draw(ptr_cmd + i);
3793
3794 while (i & 7)
3795 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3796
3797 resources[0] = bo_dst;
3798 resources[1] = bo_src;
3799 resources[2] = bo_shader_ps;
3800 resources[3] = bo_shader_vs;
3801 resources[4] = bo_cmd;
3802 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3803 CU_ASSERT_EQUAL(r, 0);
3804
3805 ib_info.ib_mc_address = mc_address_cmd;
3806 ib_info.size = i;
3807 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3808 ibs_request.ring = ring;
3809 ibs_request.resources = bo_list;
3810 ibs_request.number_of_ibs = 1;
3811 ibs_request.ibs = &ib_info;
3812 ibs_request.fence_info.handle = NULL;
3813 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3814 CU_ASSERT_EQUAL(r, 0);
3815
3816 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3817 fence_status.ip_instance = 0;
3818 fence_status.ring = ring;
3819 fence_status.context = context_handle;
3820 fence_status.fence = ibs_request.seq_no;
3821
3822 /* wait for IB accomplished */
3823 r = amdgpu_cs_query_fence_status(&fence_status,
3824 AMDGPU_TIMEOUT_INFINITE,
3825 0, &expired);
3826
3827 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3828 CU_ASSERT_EQUAL(r, 0);
3829 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3830
3831 r = amdgpu_bo_list_destroy(bo_list);
3832 CU_ASSERT_EQUAL(r, 0);
3833
3834 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3835 CU_ASSERT_EQUAL(r, 0);
3836 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3837 CU_ASSERT_EQUAL(r, 0);
3838
3839 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3840 CU_ASSERT_EQUAL(r, 0);
3841
3842 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3843 CU_ASSERT_EQUAL(r, 0);
3844 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3845 CU_ASSERT_EQUAL(r, 0);
3846
3847 r = amdgpu_cs_ctx_free(context_handle);
3848 CU_ASSERT_EQUAL(r, 0);
3849 }
3850
amdgpu_gpu_reset_test(void)3851 static void amdgpu_gpu_reset_test(void)
3852 {
3853 int r;
3854 char debugfs_path[256], tmp[10];
3855 int fd;
3856 struct stat sbuf;
3857 amdgpu_context_handle context_handle;
3858 uint32_t hang_state, hangs;
3859
3860 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3861 CU_ASSERT_EQUAL(r, 0);
3862
3863 r = fstat(drm_amdgpu[0], &sbuf);
3864 CU_ASSERT_EQUAL(r, 0);
3865
3866 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3867 fd = open(debugfs_path, O_RDONLY);
3868 CU_ASSERT(fd >= 0);
3869
3870 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3871 CU_ASSERT(r > 0);
3872
3873 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3874 CU_ASSERT_EQUAL(r, 0);
3875 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3876
3877 close(fd);
3878 r = amdgpu_cs_ctx_free(context_handle);
3879 CU_ASSERT_EQUAL(r, 0);
3880
3881 amdgpu_compute_dispatch_test();
3882 amdgpu_gfx_dispatch_test();
3883 }
3884