1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44 
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49 static  uint32_t  chip_id;
50 static  uint32_t  chip_rev;
51 
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 
66 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
67 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
68 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
69 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
70 				       unsigned ip_type,
71 				       int instance, int pm4_dw, uint32_t *pm4_src,
72 				       int res_cnt, amdgpu_bo_handle *resources,
73 				       struct amdgpu_cs_ib_info *ib_info,
74 				       struct amdgpu_cs_request *ibs_request);
75 
76 CU_TestInfo basic_tests[] = {
77 	{ "Query Info Test",  amdgpu_query_info_test },
78 	{ "Userptr Test",  amdgpu_userptr_test },
79 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
80 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
81 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
82 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
83 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
84 	{ "SW semaphore Test",  amdgpu_semaphore_test },
85 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
86 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
87 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
88 	{ "Draw Test",  amdgpu_draw_test },
89 	{ "GPU reset Test", amdgpu_gpu_reset_test },
90 	CU_TEST_INFO_NULL,
91 };
92 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
93 #define SDMA_PKT_HEADER_op_offset 0
94 #define SDMA_PKT_HEADER_op_mask   0x000000FF
95 #define SDMA_PKT_HEADER_op_shift  0
96 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
97 #define SDMA_OPCODE_CONSTANT_FILL  11
98 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
99 	/* 0 = byte fill
100 	 * 2 = DW fill
101 	 */
102 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
103 					(((sub_op) & 0xFF) << 8) |	\
104 					(((op) & 0xFF) << 0))
105 #define	SDMA_OPCODE_WRITE				  2
106 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
107 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
108 
109 #define	SDMA_OPCODE_COPY				  1
110 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
111 
112 #define	SDMA_OPCODE_ATOMIC				  10
113 #		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
114         /* 0 - single_pass_atomic.
115          * 1 - loop_until_compare_satisfied.
116          */
117 #		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
118 		/* 0 - non-TMZ.
119 		 * 1 - TMZ.
120 	     */
121 #		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
122 		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
123 		 * same as Packet 3
124 		 */
125 
126 #define GFX_COMPUTE_NOP  0xffff1000
127 #define SDMA_NOP  0x0
128 
129 /* PM4 */
130 #define	PACKET_TYPE0	0
131 #define	PACKET_TYPE1	1
132 #define	PACKET_TYPE2	2
133 #define	PACKET_TYPE3	3
134 
135 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
136 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
137 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
138 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
139 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
140 			 ((reg) & 0xFFFF) |			\
141 			 ((n) & 0x3FFF) << 16)
142 #define CP_PACKET2			0x80000000
143 #define		PACKET2_PAD_SHIFT		0
144 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
145 
146 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
147 
148 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
149 			 (((op) & 0xFF) << 8) |				\
150 			 ((n) & 0x3FFF) << 16)
151 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
152 
153 /* Packet 3 types */
154 #define	PACKET3_NOP					0x10
155 
156 #define	PACKET3_WRITE_DATA				0x37
157 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
158 		/* 0 - register
159 		 * 1 - memory (sync - via GRBM)
160 		 * 2 - gl2
161 		 * 3 - gds
162 		 * 4 - reserved
163 		 * 5 - memory (async - direct)
164 		 */
165 #define		WR_ONE_ADDR                             (1 << 16)
166 #define		WR_CONFIRM                              (1 << 20)
167 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
168 		/* 0 - LRU
169 		 * 1 - Stream
170 		 */
171 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
172 		/* 0 - me
173 		 * 1 - pfp
174 		 * 2 - ce
175 		 */
176 
177 #define	PACKET3_ATOMIC_MEM				0x1E
178 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
179 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
180             /* 0 - single_pass_atomic.
181              * 1 - loop_until_compare_satisfied.
182              */
183 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
184             /* 0 - lru.
185              * 1 - stream.
186              */
187 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
188             /* 0 - micro_engine.
189 			 */
190 
191 #define	PACKET3_DMA_DATA				0x50
192 /* 1. header
193  * 2. CONTROL
194  * 3. SRC_ADDR_LO or DATA [31:0]
195  * 4. SRC_ADDR_HI [31:0]
196  * 5. DST_ADDR_LO [31:0]
197  * 6. DST_ADDR_HI [7:0]
198  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
199  */
200 /* CONTROL */
201 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
202 		/* 0 - ME
203 		 * 1 - PFP
204 		 */
205 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
206 		/* 0 - LRU
207 		 * 1 - Stream
208 		 * 2 - Bypass
209 		 */
210 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
211 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
212 		/* 0 - DST_ADDR using DAS
213 		 * 1 - GDS
214 		 * 3 - DST_ADDR using L2
215 		 */
216 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
217 		/* 0 - LRU
218 		 * 1 - Stream
219 		 * 2 - Bypass
220 		 */
221 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
222 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
223 		/* 0 - SRC_ADDR using SAS
224 		 * 1 - GDS
225 		 * 2 - DATA
226 		 * 3 - SRC_ADDR using L2
227 		 */
228 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
229 /* COMMAND */
230 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
231 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
232 		/* 0 - none
233 		 * 1 - 8 in 16
234 		 * 2 - 8 in 32
235 		 * 3 - 8 in 64
236 		 */
237 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
238 		/* 0 - none
239 		 * 1 - 8 in 16
240 		 * 2 - 8 in 32
241 		 * 3 - 8 in 64
242 		 */
243 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
244 		/* 0 - memory
245 		 * 1 - register
246 		 */
247 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
248 		/* 0 - memory
249 		 * 1 - register
250 		 */
251 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
252 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
253 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
254 
255 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
256 						(((b) & 0x1) << 26) |		\
257 						(((t) & 0x1) << 23) |		\
258 						(((s) & 0x1) << 22) |		\
259 						(((cnt) & 0xFFFFF) << 0))
260 #define	SDMA_OPCODE_COPY_SI	3
261 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
262 #define SDMA_NOP_SI  0xf
263 #define GFX_COMPUTE_NOP_SI 0x80000000
264 #define	PACKET3_DMA_DATA_SI	0x41
265 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
266 		/* 0 - ME
267 		 * 1 - PFP
268 		 */
269 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
270 		/* 0 - DST_ADDR using DAS
271 		 * 1 - GDS
272 		 * 3 - DST_ADDR using L2
273 		 */
274 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
275 		/* 0 - SRC_ADDR using SAS
276 		 * 1 - GDS
277 		 * 2 - DATA
278 		 * 3 - SRC_ADDR using L2
279 		 */
280 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
281 
282 
283 #define PKT3_CONTEXT_CONTROL                   0x28
284 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
285 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
286 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
287 
288 #define PKT3_CLEAR_STATE                       0x12
289 
290 #define PKT3_SET_SH_REG                        0x76
291 #define		PACKET3_SET_SH_REG_START			0x00002c00
292 
293 #define	PACKET3_DISPATCH_DIRECT				0x15
294 #define PACKET3_EVENT_WRITE				0x46
295 #define PACKET3_ACQUIRE_MEM				0x58
296 #define PACKET3_SET_CONTEXT_REG				0x69
297 #define PACKET3_SET_UCONFIG_REG				0x79
298 #define PACKET3_DRAW_INDEX_AUTO				0x2D
299 /* gfx 8 */
300 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
301 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
302 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
303 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
304 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
305 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
306 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
307 
308 
309 
310 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
311 		      ((num & 0x0000ff00) << 8) | \
312 		      ((num & 0x00ff0000) >> 8) | \
313 		      ((num & 0x000000ff) << 24))
314 
315 
316 /* Shader code
317  * void main()
318 {
319 
320 	float x = some_input;
321 		for (unsigned i = 0; i < 1000000; i++)
322   	x = sin(x);
323 
324 	u[0] = 42u;
325 }
326 */
327 
328 static  uint32_t shader_bin[] = {
329 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
330 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
331 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
332 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
333 };
334 
335 #define CODE_OFFSET 512
336 #define DATA_OFFSET 1024
337 
338 enum cs_type {
339 	CS_BUFFERCLEAR,
340 	CS_BUFFERCOPY,
341 	CS_HANG,
342 	CS_HANG_SLOW
343 };
344 
345 static const uint32_t bufferclear_cs_shader_gfx9[] = {
346     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
347     0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
348     0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
349     0xbf810000
350 };
351 
352 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
353 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
354 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
355 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
356 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
357 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
358 };
359 
360 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
361 
362 static const uint32_t buffercopy_cs_shader_gfx9[] = {
363     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
364     0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
365     0xe01c2000, 0x80010200, 0xbf810000
366 };
367 
368 static const uint32_t preamblecache_gfx9[] = {
369 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
370 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
371 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
372 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
373 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
374 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
375 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
376 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
377 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
378 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
379 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
380 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
381 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
382 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
383 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
384 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
385 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
386 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
387 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
388 	0xc0017900, 0x24b, 0x0
389 };
390 
391 enum ps_type {
392 	PS_CONST,
393 	PS_TEX,
394 	PS_HANG,
395 	PS_HANG_SLOW
396 };
397 
398 static const uint32_t ps_const_shader_gfx9[] = {
399     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
400     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
401     0xC4001C0F, 0x00000100, 0xBF810000
402 };
403 
404 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
405 
406 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
407     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
408      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
409      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
410      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
411      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
412      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
413      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
414      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
415      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
416      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
417     }
418 };
419 
420 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
421     0x00000004
422 };
423 
424 static const uint32_t ps_num_sh_registers_gfx9 = 2;
425 
426 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
427     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
428     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
429 };
430 
431 static const uint32_t ps_num_context_registers_gfx9 = 7;
432 
433 static const uint32_t ps_const_context_reg_gfx9[][2] = {
434     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
435     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
436     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
437     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
438     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
439     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
440     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
441 };
442 
443 static const uint32_t ps_tex_shader_gfx9[] = {
444     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
445     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
446     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
447     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
448     0x00000100, 0xBF810000
449 };
450 
451 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
452     0x0000000B
453 };
454 
455 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
456 
457 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
458     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
459      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
460      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
461      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
462      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
463      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
464      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
465      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
466      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
467      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
468     }
469 };
470 
471 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
472     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
473     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
474 };
475 
476 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
477     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
478     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
479     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
480     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
481     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
482     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
483     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
484 };
485 
486 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
487     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
488     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
489     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
490     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
491     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
492     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
493     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
494     0xC400020F, 0x05060403, 0xBF810000
495 };
496 
497 static const uint32_t cached_cmd_gfx9[] = {
498 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
499 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
500 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
501 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
502 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
503 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
504 	0xc0026900, 0x292, 0x20, 0x60201b8,
505 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
506 };
507 
508 unsigned int memcpy_ps_hang[] = {
509         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
510         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
511         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
512         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
513         0xF800180F, 0x03020100, 0xBF810000
514 };
515 
516 struct amdgpu_test_shader {
517 	uint32_t *shader;
518 	uint32_t header_length;
519 	uint32_t body_length;
520 	uint32_t foot_length;
521 };
522 
523 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
524     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
525     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
526 };
527 
528 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
529         memcpy_cs_hang_slow_ai_codes,
530         4,
531         3,
532         1
533 };
534 
535 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
536     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
537     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
538 };
539 
540 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
541         memcpy_cs_hang_slow_rv_codes,
542         4,
543         3,
544         1
545 };
546 
547 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
548         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
549         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
550         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
551         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
552         0x03020100, 0xbf810000
553 };
554 
555 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
556         memcpy_ps_hang_slow_ai_codes,
557         7,
558         2,
559         9
560 };
561 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)562 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
563 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
564 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
565 			uint64_t *mc_address,
566 			amdgpu_va_handle *va_handle)
567 {
568 	struct amdgpu_bo_alloc_request request = {};
569 	amdgpu_bo_handle buf_handle;
570 	amdgpu_va_handle handle;
571 	uint64_t vmc_addr;
572 	int r;
573 
574 	request.alloc_size = size;
575 	request.phys_alignment = alignment;
576 	request.preferred_heap = heap;
577 	request.flags = alloc_flags;
578 
579 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
580 	if (r)
581 		return r;
582 
583 	r = amdgpu_va_range_alloc(dev,
584 				  amdgpu_gpu_va_range_general,
585 				  size, alignment, 0, &vmc_addr,
586 				  &handle, 0);
587 	if (r)
588 		goto error_va_alloc;
589 
590 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
591 				   AMDGPU_VM_PAGE_READABLE |
592 				   AMDGPU_VM_PAGE_WRITEABLE |
593 				   AMDGPU_VM_PAGE_EXECUTABLE |
594 				   mapping_flags,
595 				   AMDGPU_VA_OP_MAP);
596 	if (r)
597 		goto error_va_map;
598 
599 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
600 	if (r)
601 		goto error_cpu_map;
602 
603 	*bo = buf_handle;
604 	*mc_address = vmc_addr;
605 	*va_handle = handle;
606 
607 	return 0;
608 
609  error_cpu_map:
610 	amdgpu_bo_cpu_unmap(buf_handle);
611 
612  error_va_map:
613 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
614 
615  error_va_alloc:
616 	amdgpu_bo_free(buf_handle);
617 	return r;
618 }
619 
620 
621 
suite_basic_tests_enable(void)622 CU_BOOL suite_basic_tests_enable(void)
623 {
624 
625 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
626 					     &minor_version, &device_handle))
627 		return CU_FALSE;
628 
629 
630 	family_id = device_handle->info.family_id;
631 	chip_id = device_handle->info.chip_external_rev;
632 	chip_rev = device_handle->info.chip_rev;
633 
634 	if (amdgpu_device_deinitialize(device_handle))
635 		return CU_FALSE;
636 
637 	/* disable gfx engine basic test cases for some asics have no CPG */
638 	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
639 		if (amdgpu_set_test_active("Basic Tests",
640 					"Command submission Test (GFX)",
641 					CU_FALSE))
642 			fprintf(stderr, "test deactivation failed - %s\n",
643 				CU_get_error_msg());
644 
645 		if (amdgpu_set_test_active("Basic Tests",
646 					"Command submission Test (Multi-Fence)",
647 					CU_FALSE))
648 			fprintf(stderr, "test deactivation failed - %s\n",
649 				CU_get_error_msg());
650 
651 		if (amdgpu_set_test_active("Basic Tests",
652 					"Sync dependency Test",
653 					CU_FALSE))
654 			fprintf(stderr, "test deactivation failed - %s\n",
655 				CU_get_error_msg());
656 	}
657 
658 	return CU_TRUE;
659 }
660 
suite_basic_tests_init(void)661 int suite_basic_tests_init(void)
662 {
663 	struct amdgpu_gpu_info gpu_info = {0};
664 	int r;
665 
666 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
667 				   &minor_version, &device_handle);
668 
669 	if (r) {
670 		if ((r == -EACCES) && (errno == EACCES))
671 			printf("\n\nError:%s. "
672 				"Hint:Try to run this test program as root.",
673 				strerror(errno));
674 		return CUE_SINIT_FAILED;
675 	}
676 
677 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
678 	if (r)
679 		return CUE_SINIT_FAILED;
680 
681 	family_id = gpu_info.family_id;
682 
683 	return CUE_SUCCESS;
684 }
685 
suite_basic_tests_clean(void)686 int suite_basic_tests_clean(void)
687 {
688 	int r = amdgpu_device_deinitialize(device_handle);
689 
690 	if (r == 0)
691 		return CUE_SUCCESS;
692 	else
693 		return CUE_SCLEAN_FAILED;
694 }
695 
amdgpu_query_info_test(void)696 static void amdgpu_query_info_test(void)
697 {
698 	struct amdgpu_gpu_info gpu_info = {0};
699 	uint32_t version, feature;
700 	int r;
701 
702 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
703 	CU_ASSERT_EQUAL(r, 0);
704 
705 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
706 					  0, &version, &feature);
707 	CU_ASSERT_EQUAL(r, 0);
708 }
709 
amdgpu_command_submission_gfx_separate_ibs(void)710 static void amdgpu_command_submission_gfx_separate_ibs(void)
711 {
712 	amdgpu_context_handle context_handle;
713 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
714 	void *ib_result_cpu, *ib_result_ce_cpu;
715 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
716 	struct amdgpu_cs_request ibs_request = {0};
717 	struct amdgpu_cs_ib_info ib_info[2];
718 	struct amdgpu_cs_fence fence_status = {0};
719 	uint32_t *ptr;
720 	uint32_t expired;
721 	amdgpu_bo_list_handle bo_list;
722 	amdgpu_va_handle va_handle, va_handle_ce;
723 	int r, i = 0;
724 
725 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
726 	CU_ASSERT_EQUAL(r, 0);
727 
728 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
729 				    AMDGPU_GEM_DOMAIN_GTT, 0,
730 				    &ib_result_handle, &ib_result_cpu,
731 				    &ib_result_mc_address, &va_handle);
732 	CU_ASSERT_EQUAL(r, 0);
733 
734 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
735 				    AMDGPU_GEM_DOMAIN_GTT, 0,
736 				    &ib_result_ce_handle, &ib_result_ce_cpu,
737 				    &ib_result_ce_mc_address, &va_handle_ce);
738 	CU_ASSERT_EQUAL(r, 0);
739 
740 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
741 			       ib_result_ce_handle, &bo_list);
742 	CU_ASSERT_EQUAL(r, 0);
743 
744 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
745 
746 	/* IT_SET_CE_DE_COUNTERS */
747 	ptr = ib_result_ce_cpu;
748 	if (family_id != AMDGPU_FAMILY_SI) {
749 		ptr[i++] = 0xc0008900;
750 		ptr[i++] = 0;
751 	}
752 	ptr[i++] = 0xc0008400;
753 	ptr[i++] = 1;
754 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
755 	ib_info[0].size = i;
756 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
757 
758 	/* IT_WAIT_ON_CE_COUNTER */
759 	ptr = ib_result_cpu;
760 	ptr[0] = 0xc0008600;
761 	ptr[1] = 0x00000001;
762 	ib_info[1].ib_mc_address = ib_result_mc_address;
763 	ib_info[1].size = 2;
764 
765 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
766 	ibs_request.number_of_ibs = 2;
767 	ibs_request.ibs = ib_info;
768 	ibs_request.resources = bo_list;
769 	ibs_request.fence_info.handle = NULL;
770 
771 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
772 
773 	CU_ASSERT_EQUAL(r, 0);
774 
775 	fence_status.context = context_handle;
776 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
777 	fence_status.ip_instance = 0;
778 	fence_status.fence = ibs_request.seq_no;
779 
780 	r = amdgpu_cs_query_fence_status(&fence_status,
781 					 AMDGPU_TIMEOUT_INFINITE,
782 					 0, &expired);
783 	CU_ASSERT_EQUAL(r, 0);
784 
785 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
786 				     ib_result_mc_address, 4096);
787 	CU_ASSERT_EQUAL(r, 0);
788 
789 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
790 				     ib_result_ce_mc_address, 4096);
791 	CU_ASSERT_EQUAL(r, 0);
792 
793 	r = amdgpu_bo_list_destroy(bo_list);
794 	CU_ASSERT_EQUAL(r, 0);
795 
796 	r = amdgpu_cs_ctx_free(context_handle);
797 	CU_ASSERT_EQUAL(r, 0);
798 
799 }
800 
amdgpu_command_submission_gfx_shared_ib(void)801 static void amdgpu_command_submission_gfx_shared_ib(void)
802 {
803 	amdgpu_context_handle context_handle;
804 	amdgpu_bo_handle ib_result_handle;
805 	void *ib_result_cpu;
806 	uint64_t ib_result_mc_address;
807 	struct amdgpu_cs_request ibs_request = {0};
808 	struct amdgpu_cs_ib_info ib_info[2];
809 	struct amdgpu_cs_fence fence_status = {0};
810 	uint32_t *ptr;
811 	uint32_t expired;
812 	amdgpu_bo_list_handle bo_list;
813 	amdgpu_va_handle va_handle;
814 	int r, i = 0;
815 
816 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
817 	CU_ASSERT_EQUAL(r, 0);
818 
819 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
820 				    AMDGPU_GEM_DOMAIN_GTT, 0,
821 				    &ib_result_handle, &ib_result_cpu,
822 				    &ib_result_mc_address, &va_handle);
823 	CU_ASSERT_EQUAL(r, 0);
824 
825 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
826 			       &bo_list);
827 	CU_ASSERT_EQUAL(r, 0);
828 
829 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
830 
831 	/* IT_SET_CE_DE_COUNTERS */
832 	ptr = ib_result_cpu;
833 	if (family_id != AMDGPU_FAMILY_SI) {
834 		ptr[i++] = 0xc0008900;
835 		ptr[i++] = 0;
836 	}
837 	ptr[i++] = 0xc0008400;
838 	ptr[i++] = 1;
839 	ib_info[0].ib_mc_address = ib_result_mc_address;
840 	ib_info[0].size = i;
841 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
842 
843 	ptr = (uint32_t *)ib_result_cpu + 4;
844 	ptr[0] = 0xc0008600;
845 	ptr[1] = 0x00000001;
846 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
847 	ib_info[1].size = 2;
848 
849 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
850 	ibs_request.number_of_ibs = 2;
851 	ibs_request.ibs = ib_info;
852 	ibs_request.resources = bo_list;
853 	ibs_request.fence_info.handle = NULL;
854 
855 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
856 
857 	CU_ASSERT_EQUAL(r, 0);
858 
859 	fence_status.context = context_handle;
860 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
861 	fence_status.ip_instance = 0;
862 	fence_status.fence = ibs_request.seq_no;
863 
864 	r = amdgpu_cs_query_fence_status(&fence_status,
865 					 AMDGPU_TIMEOUT_INFINITE,
866 					 0, &expired);
867 	CU_ASSERT_EQUAL(r, 0);
868 
869 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
870 				     ib_result_mc_address, 4096);
871 	CU_ASSERT_EQUAL(r, 0);
872 
873 	r = amdgpu_bo_list_destroy(bo_list);
874 	CU_ASSERT_EQUAL(r, 0);
875 
876 	r = amdgpu_cs_ctx_free(context_handle);
877 	CU_ASSERT_EQUAL(r, 0);
878 }
879 
amdgpu_command_submission_gfx_cp_write_data(void)880 static void amdgpu_command_submission_gfx_cp_write_data(void)
881 {
882 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
883 }
884 
amdgpu_command_submission_gfx_cp_const_fill(void)885 static void amdgpu_command_submission_gfx_cp_const_fill(void)
886 {
887 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
888 }
889 
amdgpu_command_submission_gfx_cp_copy_data(void)890 static void amdgpu_command_submission_gfx_cp_copy_data(void)
891 {
892 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
893 }
894 
amdgpu_bo_eviction_test(void)895 static void amdgpu_bo_eviction_test(void)
896 {
897 	const int sdma_write_length = 1024;
898 	const int pm4_dw = 256;
899 	amdgpu_context_handle context_handle;
900 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
901 	amdgpu_bo_handle *resources;
902 	uint32_t *pm4;
903 	struct amdgpu_cs_ib_info *ib_info;
904 	struct amdgpu_cs_request *ibs_request;
905 	uint64_t bo1_mc, bo2_mc;
906 	volatile unsigned char *bo1_cpu, *bo2_cpu;
907 	int i, j, r, loop1, loop2;
908 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
909 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
910 	struct amdgpu_heap_info vram_info, gtt_info;
911 
912 	pm4 = calloc(pm4_dw, sizeof(*pm4));
913 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
914 
915 	ib_info = calloc(1, sizeof(*ib_info));
916 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
917 
918 	ibs_request = calloc(1, sizeof(*ibs_request));
919 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
920 
921 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
922 	CU_ASSERT_EQUAL(r, 0);
923 
924 	/* prepare resource */
925 	resources = calloc(4, sizeof(amdgpu_bo_handle));
926 	CU_ASSERT_NOT_EQUAL(resources, NULL);
927 
928 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
929 				   0, &vram_info);
930 	CU_ASSERT_EQUAL(r, 0);
931 
932 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
933 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
934 	CU_ASSERT_EQUAL(r, 0);
935 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
936 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
937 	CU_ASSERT_EQUAL(r, 0);
938 
939 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
940 				   0, >t_info);
941 	CU_ASSERT_EQUAL(r, 0);
942 
943 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
944 				 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
945 	CU_ASSERT_EQUAL(r, 0);
946 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
947 				 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
948 	CU_ASSERT_EQUAL(r, 0);
949 
950 
951 
952 	loop1 = loop2 = 0;
953 	/* run 9 circle to test all mapping combination */
954 	while(loop1 < 2) {
955 		while(loop2 < 2) {
956 			/* allocate UC bo1for sDMA use */
957 			r = amdgpu_bo_alloc_and_map(device_handle,
958 						    sdma_write_length, 4096,
959 						    AMDGPU_GEM_DOMAIN_GTT,
960 						    gtt_flags[loop1], &bo1,
961 						    (void**)&bo1_cpu, &bo1_mc,
962 						    &bo1_va_handle);
963 			CU_ASSERT_EQUAL(r, 0);
964 
965 			/* set bo1 */
966 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
967 
968 			/* allocate UC bo2 for sDMA use */
969 			r = amdgpu_bo_alloc_and_map(device_handle,
970 						    sdma_write_length, 4096,
971 						    AMDGPU_GEM_DOMAIN_GTT,
972 						    gtt_flags[loop2], &bo2,
973 						    (void**)&bo2_cpu, &bo2_mc,
974 						    &bo2_va_handle);
975 			CU_ASSERT_EQUAL(r, 0);
976 
977 			/* clear bo2 */
978 			memset((void*)bo2_cpu, 0, sdma_write_length);
979 
980 			resources[0] = bo1;
981 			resources[1] = bo2;
982 			resources[2] = vram_max[loop2];
983 			resources[3] = gtt_max[loop2];
984 
985 			/* fulfill PM4: test DMA copy linear */
986 			i = j = 0;
987 			if (family_id == AMDGPU_FAMILY_SI) {
988 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
989 							  sdma_write_length);
990 				pm4[i++] = 0xffffffff & bo2_mc;
991 				pm4[i++] = 0xffffffff & bo1_mc;
992 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
993 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
994 			} else {
995 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
996 				if (family_id >= AMDGPU_FAMILY_AI)
997 					pm4[i++] = sdma_write_length - 1;
998 				else
999 					pm4[i++] = sdma_write_length;
1000 				pm4[i++] = 0;
1001 				pm4[i++] = 0xffffffff & bo1_mc;
1002 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1003 				pm4[i++] = 0xffffffff & bo2_mc;
1004 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1005 			}
1006 
1007 			amdgpu_test_exec_cs_helper(context_handle,
1008 						   AMDGPU_HW_IP_DMA, 0,
1009 						   i, pm4,
1010 						   4, resources,
1011 						   ib_info, ibs_request);
1012 
1013 			/* verify if SDMA test result meets with expected */
1014 			i = 0;
1015 			while(i < sdma_write_length) {
1016 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1017 			}
1018 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1019 						     sdma_write_length);
1020 			CU_ASSERT_EQUAL(r, 0);
1021 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1022 						     sdma_write_length);
1023 			CU_ASSERT_EQUAL(r, 0);
1024 			loop2++;
1025 		}
1026 		loop2 = 0;
1027 		loop1++;
1028 	}
1029 	amdgpu_bo_free(vram_max[0]);
1030 	amdgpu_bo_free(vram_max[1]);
1031 	amdgpu_bo_free(gtt_max[0]);
1032 	amdgpu_bo_free(gtt_max[1]);
1033 	/* clean resources */
1034 	free(resources);
1035 	free(ibs_request);
1036 	free(ib_info);
1037 	free(pm4);
1038 
1039 	/* end of test */
1040 	r = amdgpu_cs_ctx_free(context_handle);
1041 	CU_ASSERT_EQUAL(r, 0);
1042 }
1043 
1044 
amdgpu_command_submission_gfx(void)1045 static void amdgpu_command_submission_gfx(void)
1046 {
1047 	/* write data using the CP */
1048 	amdgpu_command_submission_gfx_cp_write_data();
1049 	/* const fill using the CP */
1050 	amdgpu_command_submission_gfx_cp_const_fill();
1051 	/* copy data using the CP */
1052 	amdgpu_command_submission_gfx_cp_copy_data();
1053 	/* separate IB buffers for multi-IB submission */
1054 	amdgpu_command_submission_gfx_separate_ibs();
1055 	/* shared IB buffer for multi-IB submission */
1056 	amdgpu_command_submission_gfx_shared_ib();
1057 }
1058 
amdgpu_semaphore_test(void)1059 static void amdgpu_semaphore_test(void)
1060 {
1061 	amdgpu_context_handle context_handle[2];
1062 	amdgpu_semaphore_handle sem;
1063 	amdgpu_bo_handle ib_result_handle[2];
1064 	void *ib_result_cpu[2];
1065 	uint64_t ib_result_mc_address[2];
1066 	struct amdgpu_cs_request ibs_request[2] = {0};
1067 	struct amdgpu_cs_ib_info ib_info[2] = {0};
1068 	struct amdgpu_cs_fence fence_status = {0};
1069 	uint32_t *ptr;
1070 	uint32_t expired;
1071 	uint32_t sdma_nop, gfx_nop;
1072 	amdgpu_bo_list_handle bo_list[2];
1073 	amdgpu_va_handle va_handle[2];
1074 	int r, i;
1075 	struct amdgpu_gpu_info gpu_info = {0};
1076 	unsigned gc_ip_type;
1077 
1078 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1079 	CU_ASSERT_EQUAL(r, 0);
1080 
1081 	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1082 			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1083 
1084 	if (family_id == AMDGPU_FAMILY_SI) {
1085 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1086 		gfx_nop = GFX_COMPUTE_NOP_SI;
1087 	} else {
1088 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1089 		gfx_nop = GFX_COMPUTE_NOP;
1090 	}
1091 
1092 	r = amdgpu_cs_create_semaphore(&sem);
1093 	CU_ASSERT_EQUAL(r, 0);
1094 	for (i = 0; i < 2; i++) {
1095 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1096 		CU_ASSERT_EQUAL(r, 0);
1097 
1098 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1099 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1100 					    &ib_result_handle[i], &ib_result_cpu[i],
1101 					    &ib_result_mc_address[i], &va_handle[i]);
1102 		CU_ASSERT_EQUAL(r, 0);
1103 
1104 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1105 				       NULL, &bo_list[i]);
1106 		CU_ASSERT_EQUAL(r, 0);
1107 	}
1108 
1109 	/* 1. same context different engine */
1110 	ptr = ib_result_cpu[0];
1111 	ptr[0] = sdma_nop;
1112 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1113 	ib_info[0].size = 1;
1114 
1115 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1116 	ibs_request[0].number_of_ibs = 1;
1117 	ibs_request[0].ibs = &ib_info[0];
1118 	ibs_request[0].resources = bo_list[0];
1119 	ibs_request[0].fence_info.handle = NULL;
1120 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1121 	CU_ASSERT_EQUAL(r, 0);
1122 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1123 	CU_ASSERT_EQUAL(r, 0);
1124 
1125 	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1126 	CU_ASSERT_EQUAL(r, 0);
1127 	ptr = ib_result_cpu[1];
1128 	ptr[0] = gfx_nop;
1129 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1130 	ib_info[1].size = 1;
1131 
1132 	ibs_request[1].ip_type = gc_ip_type;
1133 	ibs_request[1].number_of_ibs = 1;
1134 	ibs_request[1].ibs = &ib_info[1];
1135 	ibs_request[1].resources = bo_list[1];
1136 	ibs_request[1].fence_info.handle = NULL;
1137 
1138 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1139 	CU_ASSERT_EQUAL(r, 0);
1140 
1141 	fence_status.context = context_handle[0];
1142 	fence_status.ip_type = gc_ip_type;
1143 	fence_status.ip_instance = 0;
1144 	fence_status.fence = ibs_request[1].seq_no;
1145 	r = amdgpu_cs_query_fence_status(&fence_status,
1146 					 500000000, 0, &expired);
1147 	CU_ASSERT_EQUAL(r, 0);
1148 	CU_ASSERT_EQUAL(expired, true);
1149 
1150 	/* 2. same engine different context */
1151 	ptr = ib_result_cpu[0];
1152 	ptr[0] = gfx_nop;
1153 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1154 	ib_info[0].size = 1;
1155 
1156 	ibs_request[0].ip_type = gc_ip_type;
1157 	ibs_request[0].number_of_ibs = 1;
1158 	ibs_request[0].ibs = &ib_info[0];
1159 	ibs_request[0].resources = bo_list[0];
1160 	ibs_request[0].fence_info.handle = NULL;
1161 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1162 	CU_ASSERT_EQUAL(r, 0);
1163 	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1164 	CU_ASSERT_EQUAL(r, 0);
1165 
1166 	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1167 	CU_ASSERT_EQUAL(r, 0);
1168 	ptr = ib_result_cpu[1];
1169 	ptr[0] = gfx_nop;
1170 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1171 	ib_info[1].size = 1;
1172 
1173 	ibs_request[1].ip_type = gc_ip_type;
1174 	ibs_request[1].number_of_ibs = 1;
1175 	ibs_request[1].ibs = &ib_info[1];
1176 	ibs_request[1].resources = bo_list[1];
1177 	ibs_request[1].fence_info.handle = NULL;
1178 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1179 
1180 	CU_ASSERT_EQUAL(r, 0);
1181 
1182 	fence_status.context = context_handle[1];
1183 	fence_status.ip_type = gc_ip_type;
1184 	fence_status.ip_instance = 0;
1185 	fence_status.fence = ibs_request[1].seq_no;
1186 	r = amdgpu_cs_query_fence_status(&fence_status,
1187 					 500000000, 0, &expired);
1188 	CU_ASSERT_EQUAL(r, 0);
1189 	CU_ASSERT_EQUAL(expired, true);
1190 
1191 	for (i = 0; i < 2; i++) {
1192 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1193 					     ib_result_mc_address[i], 4096);
1194 		CU_ASSERT_EQUAL(r, 0);
1195 
1196 		r = amdgpu_bo_list_destroy(bo_list[i]);
1197 		CU_ASSERT_EQUAL(r, 0);
1198 
1199 		r = amdgpu_cs_ctx_free(context_handle[i]);
1200 		CU_ASSERT_EQUAL(r, 0);
1201 	}
1202 
1203 	r = amdgpu_cs_destroy_semaphore(sem);
1204 	CU_ASSERT_EQUAL(r, 0);
1205 }
1206 
amdgpu_command_submission_compute_nop(void)1207 static void amdgpu_command_submission_compute_nop(void)
1208 {
1209 	amdgpu_context_handle context_handle;
1210 	amdgpu_bo_handle ib_result_handle;
1211 	void *ib_result_cpu;
1212 	uint64_t ib_result_mc_address;
1213 	struct amdgpu_cs_request ibs_request;
1214 	struct amdgpu_cs_ib_info ib_info;
1215 	struct amdgpu_cs_fence fence_status;
1216 	uint32_t *ptr;
1217 	uint32_t expired;
1218 	int r, instance;
1219 	amdgpu_bo_list_handle bo_list;
1220 	amdgpu_va_handle va_handle;
1221 	struct drm_amdgpu_info_hw_ip info;
1222 
1223 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1224 	CU_ASSERT_EQUAL(r, 0);
1225 
1226 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1227 	CU_ASSERT_EQUAL(r, 0);
1228 
1229 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1230 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1231 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1232 					    &ib_result_handle, &ib_result_cpu,
1233 					    &ib_result_mc_address, &va_handle);
1234 		CU_ASSERT_EQUAL(r, 0);
1235 
1236 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1237 				       &bo_list);
1238 		CU_ASSERT_EQUAL(r, 0);
1239 
1240 		ptr = ib_result_cpu;
1241 		memset(ptr, 0, 16);
1242 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1243 
1244 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1245 		ib_info.ib_mc_address = ib_result_mc_address;
1246 		ib_info.size = 16;
1247 
1248 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1249 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1250 		ibs_request.ring = instance;
1251 		ibs_request.number_of_ibs = 1;
1252 		ibs_request.ibs = &ib_info;
1253 		ibs_request.resources = bo_list;
1254 		ibs_request.fence_info.handle = NULL;
1255 
1256 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1257 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1258 		CU_ASSERT_EQUAL(r, 0);
1259 
1260 		fence_status.context = context_handle;
1261 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1262 		fence_status.ip_instance = 0;
1263 		fence_status.ring = instance;
1264 		fence_status.fence = ibs_request.seq_no;
1265 
1266 		r = amdgpu_cs_query_fence_status(&fence_status,
1267 						 AMDGPU_TIMEOUT_INFINITE,
1268 						 0, &expired);
1269 		CU_ASSERT_EQUAL(r, 0);
1270 
1271 		r = amdgpu_bo_list_destroy(bo_list);
1272 		CU_ASSERT_EQUAL(r, 0);
1273 
1274 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1275 					     ib_result_mc_address, 4096);
1276 		CU_ASSERT_EQUAL(r, 0);
1277 	}
1278 
1279 	r = amdgpu_cs_ctx_free(context_handle);
1280 	CU_ASSERT_EQUAL(r, 0);
1281 }
1282 
amdgpu_command_submission_compute_cp_write_data(void)1283 static void amdgpu_command_submission_compute_cp_write_data(void)
1284 {
1285 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1286 }
1287 
amdgpu_command_submission_compute_cp_const_fill(void)1288 static void amdgpu_command_submission_compute_cp_const_fill(void)
1289 {
1290 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1291 }
1292 
amdgpu_command_submission_compute_cp_copy_data(void)1293 static void amdgpu_command_submission_compute_cp_copy_data(void)
1294 {
1295 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1296 }
1297 
amdgpu_command_submission_compute(void)1298 static void amdgpu_command_submission_compute(void)
1299 {
1300 	/* write data using the CP */
1301 	amdgpu_command_submission_compute_cp_write_data();
1302 	/* const fill using the CP */
1303 	amdgpu_command_submission_compute_cp_const_fill();
1304 	/* copy data using the CP */
1305 	amdgpu_command_submission_compute_cp_copy_data();
1306 	/* nop test */
1307 	amdgpu_command_submission_compute_nop();
1308 }
1309 
1310 /*
1311  * caller need create/release:
1312  * pm4_src, resources, ib_info, and ibs_request
1313  * submit command stream described in ibs_request and wait for this IB accomplished
1314  */
1315 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1316 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1317 			       amdgpu_context_handle context_handle,
1318 			       unsigned ip_type, int instance, int pm4_dw,
1319 			       uint32_t *pm4_src, int res_cnt,
1320 			       amdgpu_bo_handle *resources,
1321 			       struct amdgpu_cs_ib_info *ib_info,
1322 			       struct amdgpu_cs_request *ibs_request,
1323 			       bool secure)
1324 {
1325 	int r;
1326 	uint32_t expired;
1327 	uint32_t *ring_ptr;
1328 	amdgpu_bo_handle ib_result_handle;
1329 	void *ib_result_cpu;
1330 	uint64_t ib_result_mc_address;
1331 	struct amdgpu_cs_fence fence_status = {0};
1332 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1333 	amdgpu_va_handle va_handle;
1334 
1335 	/* prepare CS */
1336 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1337 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1338 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1339 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1340 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1341 
1342 	/* allocate IB */
1343 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1344 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1345 				    &ib_result_handle, &ib_result_cpu,
1346 				    &ib_result_mc_address, &va_handle);
1347 	CU_ASSERT_EQUAL(r, 0);
1348 
1349 	/* copy PM4 packet to ring from caller */
1350 	ring_ptr = ib_result_cpu;
1351 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1352 
1353 	ib_info->ib_mc_address = ib_result_mc_address;
1354 	ib_info->size = pm4_dw;
1355 	if (secure)
1356 		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1357 
1358 	ibs_request->ip_type = ip_type;
1359 	ibs_request->ring = instance;
1360 	ibs_request->number_of_ibs = 1;
1361 	ibs_request->ibs = ib_info;
1362 	ibs_request->fence_info.handle = NULL;
1363 
1364 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1365 	all_res[res_cnt] = ib_result_handle;
1366 
1367 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1368 				  NULL, &ibs_request->resources);
1369 	CU_ASSERT_EQUAL(r, 0);
1370 
1371 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1372 
1373 	/* submit CS */
1374 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1375 	CU_ASSERT_EQUAL(r, 0);
1376 
1377 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1378 	CU_ASSERT_EQUAL(r, 0);
1379 
1380 	fence_status.ip_type = ip_type;
1381 	fence_status.ip_instance = 0;
1382 	fence_status.ring = ibs_request->ring;
1383 	fence_status.context = context_handle;
1384 	fence_status.fence = ibs_request->seq_no;
1385 
1386 	/* wait for IB accomplished */
1387 	r = amdgpu_cs_query_fence_status(&fence_status,
1388 					 AMDGPU_TIMEOUT_INFINITE,
1389 					 0, &expired);
1390 	CU_ASSERT_EQUAL(r, 0);
1391 	CU_ASSERT_EQUAL(expired, true);
1392 
1393 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1394 				     ib_result_mc_address, 4096);
1395 	CU_ASSERT_EQUAL(r, 0);
1396 }
1397 
1398 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1399 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1400 			   unsigned ip_type, int instance, int pm4_dw,
1401 			   uint32_t *pm4_src, int res_cnt,
1402 			   amdgpu_bo_handle *resources,
1403 			   struct amdgpu_cs_ib_info *ib_info,
1404 			   struct amdgpu_cs_request *ibs_request)
1405 {
1406 	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1407 				       ip_type, instance, pm4_dw, pm4_src,
1408 				       res_cnt, resources, ib_info,
1409 				       ibs_request, false);
1410 }
1411 
1412 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1413 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1414 							  device, unsigned
1415 							  ip_type, bool secure)
1416 {
1417 	const int sdma_write_length = 128;
1418 	const int pm4_dw = 256;
1419 	amdgpu_context_handle context_handle;
1420 	amdgpu_bo_handle bo;
1421 	amdgpu_bo_handle *resources;
1422 	uint32_t *pm4;
1423 	struct amdgpu_cs_ib_info *ib_info;
1424 	struct amdgpu_cs_request *ibs_request;
1425 	uint64_t bo_mc;
1426 	volatile uint32_t *bo_cpu;
1427 	uint32_t bo_cpu_origin;
1428 	int i, j, r, loop, ring_id;
1429 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1430 	amdgpu_va_handle va_handle;
1431 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1432 
1433 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1434 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1435 
1436 	ib_info = calloc(1, sizeof(*ib_info));
1437 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1438 
1439 	ibs_request = calloc(1, sizeof(*ibs_request));
1440 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1441 
1442 	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1443 	CU_ASSERT_EQUAL(r, 0);
1444 
1445 	for (i = 0; secure && (i < 2); i++)
1446 		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1447 
1448 	r = amdgpu_cs_ctx_create(device, &context_handle);
1449 
1450 	CU_ASSERT_EQUAL(r, 0);
1451 
1452 	/* prepare resource */
1453 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1454 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1455 
1456 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1457 		loop = 0;
1458 		while(loop < 2) {
1459 			/* allocate UC bo for sDMA use */
1460 			r = amdgpu_bo_alloc_and_map(device,
1461 						    sdma_write_length * sizeof(uint32_t),
1462 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1463 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1464 						    &bo_mc, &va_handle);
1465 			CU_ASSERT_EQUAL(r, 0);
1466 
1467 			/* clear bo */
1468 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1469 
1470 			resources[0] = bo;
1471 
1472 			/* fulfill PM4: test DMA write-linear */
1473 			i = j = 0;
1474 			if (ip_type == AMDGPU_HW_IP_DMA) {
1475 				if (family_id == AMDGPU_FAMILY_SI)
1476 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1477 								  sdma_write_length);
1478 				else
1479 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1480 							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1481 							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1482 				pm4[i++] = 0xfffffffc & bo_mc;
1483 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1484 				if (family_id >= AMDGPU_FAMILY_AI)
1485 					pm4[i++] = sdma_write_length - 1;
1486 				else if (family_id != AMDGPU_FAMILY_SI)
1487 					pm4[i++] = sdma_write_length;
1488 				while(j++ < sdma_write_length)
1489 					pm4[i++] = 0xdeadbeaf;
1490 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1491 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1492 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1493 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1494 				pm4[i++] = 0xfffffffc & bo_mc;
1495 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1496 				while(j++ < sdma_write_length)
1497 					pm4[i++] = 0xdeadbeaf;
1498 			}
1499 
1500 			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1501 						       ip_type, ring_id, i, pm4,
1502 						       1, resources, ib_info,
1503 						       ibs_request, secure);
1504 
1505 			/* verify if SDMA test result meets with expected */
1506 			i = 0;
1507 			if (!secure) {
1508 				while(i < sdma_write_length) {
1509 					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1510 				}
1511 			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1512 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1513 				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1514 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1515 				 * command, 1-loop_until_compare_satisfied.
1516 				 * single_pass_atomic, 0-lru
1517 				 * engine_sel, 0-micro_engine
1518 				 */
1519 				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1520 							ATOMIC_MEM_COMMAND(1) |
1521 							ATOMIC_MEM_CACHEPOLICAY(0) |
1522 							ATOMIC_MEM_ENGINESEL(0));
1523 				pm4[i++] = 0xfffffffc & bo_mc;
1524 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1525 				pm4[i++] = 0x12345678;
1526 				pm4[i++] = 0x0;
1527 				pm4[i++] = 0xdeadbeaf;
1528 				pm4[i++] = 0x0;
1529 				pm4[i++] = 0x100;
1530 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1531 							ip_type, ring_id, i, pm4,
1532 							1, resources, ib_info,
1533 							ibs_request, true);
1534 			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1535 				/* restore the bo_cpu to compare */
1536 				bo_cpu_origin = bo_cpu[0];
1537 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1538 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1539 				 * loop, 1-loop_until_compare_satisfied.
1540 				 * single_pass_atomic, 0-lru
1541 				 */
1542 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1543 							       0,
1544 							       SDMA_ATOMIC_LOOP(1) |
1545 							       SDMA_ATOMIC_TMZ(1) |
1546 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1547 				pm4[i++] = 0xfffffffc & bo_mc;
1548 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1549 				pm4[i++] = 0x12345678;
1550 				pm4[i++] = 0x0;
1551 				pm4[i++] = 0xdeadbeaf;
1552 				pm4[i++] = 0x0;
1553 				pm4[i++] = 0x100;
1554 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1555 							ip_type, ring_id, i, pm4,
1556 							1, resources, ib_info,
1557 							ibs_request, true);
1558 				/* DMA's atomic behavir is unlike GFX
1559 				 * If the comparing data is not equal to destination data,
1560 				 * For GFX, loop again till gfx timeout(system hang).
1561 				 * For DMA, loop again till timer expired and then send interrupt.
1562 				 * So testcase can't use interrupt mechanism.
1563 				 * We take another way to verify. When the comparing data is not
1564 				 * equal to destination data, overwrite the source data to the destination
1565 				 * buffer. Otherwise, original destination data unchanged.
1566 				 * So if the bo_cpu data is overwritten, the result is passed.
1567 				 */
1568 				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1569 
1570 				/* compare again for the case of dest_data != cmp_data */
1571 				i = 0;
1572 				/* restore again, here dest_data should be */
1573 				bo_cpu_origin = bo_cpu[0];
1574 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1575 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1576 							       0,
1577 							       SDMA_ATOMIC_LOOP(1) |
1578 							       SDMA_ATOMIC_TMZ(1) |
1579 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1580 				pm4[i++] = 0xfffffffc & bo_mc;
1581 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1582 				pm4[i++] = 0x87654321;
1583 				pm4[i++] = 0x0;
1584 				pm4[i++] = 0xdeadbeaf;
1585 				pm4[i++] = 0x0;
1586 				pm4[i++] = 0x100;
1587 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1588 							ip_type, ring_id, i, pm4,
1589 							1, resources, ib_info,
1590 							ibs_request, true);
1591 				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1592 				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1593 			}
1594 
1595 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1596 						     sdma_write_length * sizeof(uint32_t));
1597 			CU_ASSERT_EQUAL(r, 0);
1598 			loop++;
1599 		}
1600 	}
1601 	/* clean resources */
1602 	free(resources);
1603 	free(ibs_request);
1604 	free(ib_info);
1605 	free(pm4);
1606 
1607 	/* end of test */
1608 	r = amdgpu_cs_ctx_free(context_handle);
1609 	CU_ASSERT_EQUAL(r, 0);
1610 }
1611 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1612 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1613 {
1614 	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1615 								  ip_type,
1616 								  false);
1617 }
1618 
amdgpu_command_submission_sdma_write_linear(void)1619 static void amdgpu_command_submission_sdma_write_linear(void)
1620 {
1621 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1622 }
1623 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1624 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1625 {
1626 	const int sdma_write_length = 1024 * 1024;
1627 	const int pm4_dw = 256;
1628 	amdgpu_context_handle context_handle;
1629 	amdgpu_bo_handle bo;
1630 	amdgpu_bo_handle *resources;
1631 	uint32_t *pm4;
1632 	struct amdgpu_cs_ib_info *ib_info;
1633 	struct amdgpu_cs_request *ibs_request;
1634 	uint64_t bo_mc;
1635 	volatile uint32_t *bo_cpu;
1636 	int i, j, r, loop, ring_id;
1637 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1638 	amdgpu_va_handle va_handle;
1639 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1640 
1641 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1642 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1643 
1644 	ib_info = calloc(1, sizeof(*ib_info));
1645 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1646 
1647 	ibs_request = calloc(1, sizeof(*ibs_request));
1648 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1649 
1650 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1651 	CU_ASSERT_EQUAL(r, 0);
1652 
1653 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1654 	CU_ASSERT_EQUAL(r, 0);
1655 
1656 	/* prepare resource */
1657 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1658 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1659 
1660 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1661 		loop = 0;
1662 		while(loop < 2) {
1663 			/* allocate UC bo for sDMA use */
1664 			r = amdgpu_bo_alloc_and_map(device_handle,
1665 						    sdma_write_length, 4096,
1666 						    AMDGPU_GEM_DOMAIN_GTT,
1667 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1668 						    &bo_mc, &va_handle);
1669 			CU_ASSERT_EQUAL(r, 0);
1670 
1671 			/* clear bo */
1672 			memset((void*)bo_cpu, 0, sdma_write_length);
1673 
1674 			resources[0] = bo;
1675 
1676 			/* fulfill PM4: test DMA const fill */
1677 			i = j = 0;
1678 			if (ip_type == AMDGPU_HW_IP_DMA) {
1679 				if (family_id == AMDGPU_FAMILY_SI) {
1680 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1681 								  0, 0, 0,
1682 								  sdma_write_length / 4);
1683 					pm4[i++] = 0xfffffffc & bo_mc;
1684 					pm4[i++] = 0xdeadbeaf;
1685 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1686 				} else {
1687 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1688 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1689 					pm4[i++] = 0xffffffff & bo_mc;
1690 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1691 					pm4[i++] = 0xdeadbeaf;
1692 					if (family_id >= AMDGPU_FAMILY_AI)
1693 						pm4[i++] = sdma_write_length - 1;
1694 					else
1695 						pm4[i++] = sdma_write_length;
1696 				}
1697 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1698 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1699 				if (family_id == AMDGPU_FAMILY_SI) {
1700 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1701 					pm4[i++] = 0xdeadbeaf;
1702 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1703 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1704 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1705 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1706 					pm4[i++] = 0xffffffff & bo_mc;
1707 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1708 					pm4[i++] = sdma_write_length;
1709 				} else {
1710 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1711 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1712 						   PACKET3_DMA_DATA_DST_SEL(0) |
1713 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1714 						   PACKET3_DMA_DATA_CP_SYNC;
1715 					pm4[i++] = 0xdeadbeaf;
1716 					pm4[i++] = 0;
1717 					pm4[i++] = 0xfffffffc & bo_mc;
1718 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1719 					pm4[i++] = sdma_write_length;
1720 				}
1721 			}
1722 
1723 			amdgpu_test_exec_cs_helper(context_handle,
1724 						   ip_type, ring_id,
1725 						   i, pm4,
1726 						   1, resources,
1727 						   ib_info, ibs_request);
1728 
1729 			/* verify if SDMA test result meets with expected */
1730 			i = 0;
1731 			while(i < (sdma_write_length / 4)) {
1732 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1733 			}
1734 
1735 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1736 						     sdma_write_length);
1737 			CU_ASSERT_EQUAL(r, 0);
1738 			loop++;
1739 		}
1740 	}
1741 	/* clean resources */
1742 	free(resources);
1743 	free(ibs_request);
1744 	free(ib_info);
1745 	free(pm4);
1746 
1747 	/* end of test */
1748 	r = amdgpu_cs_ctx_free(context_handle);
1749 	CU_ASSERT_EQUAL(r, 0);
1750 }
1751 
amdgpu_command_submission_sdma_const_fill(void)1752 static void amdgpu_command_submission_sdma_const_fill(void)
1753 {
1754 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1755 }
1756 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1757 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1758 {
1759 	const int sdma_write_length = 1024;
1760 	const int pm4_dw = 256;
1761 	amdgpu_context_handle context_handle;
1762 	amdgpu_bo_handle bo1, bo2;
1763 	amdgpu_bo_handle *resources;
1764 	uint32_t *pm4;
1765 	struct amdgpu_cs_ib_info *ib_info;
1766 	struct amdgpu_cs_request *ibs_request;
1767 	uint64_t bo1_mc, bo2_mc;
1768 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1769 	int i, j, r, loop1, loop2, ring_id;
1770 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1771 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1772 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1773 
1774 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1775 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1776 
1777 	ib_info = calloc(1, sizeof(*ib_info));
1778 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1779 
1780 	ibs_request = calloc(1, sizeof(*ibs_request));
1781 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1782 
1783 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1784 	CU_ASSERT_EQUAL(r, 0);
1785 
1786 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1787 	CU_ASSERT_EQUAL(r, 0);
1788 
1789 	/* prepare resource */
1790 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1791 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1792 
1793 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1794 		loop1 = loop2 = 0;
1795 		/* run 9 circle to test all mapping combination */
1796 		while(loop1 < 2) {
1797 			while(loop2 < 2) {
1798 				/* allocate UC bo1for sDMA use */
1799 				r = amdgpu_bo_alloc_and_map(device_handle,
1800 							    sdma_write_length, 4096,
1801 							    AMDGPU_GEM_DOMAIN_GTT,
1802 							    gtt_flags[loop1], &bo1,
1803 							    (void**)&bo1_cpu, &bo1_mc,
1804 							    &bo1_va_handle);
1805 				CU_ASSERT_EQUAL(r, 0);
1806 
1807 				/* set bo1 */
1808 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1809 
1810 				/* allocate UC bo2 for sDMA use */
1811 				r = amdgpu_bo_alloc_and_map(device_handle,
1812 							    sdma_write_length, 4096,
1813 							    AMDGPU_GEM_DOMAIN_GTT,
1814 							    gtt_flags[loop2], &bo2,
1815 							    (void**)&bo2_cpu, &bo2_mc,
1816 							    &bo2_va_handle);
1817 				CU_ASSERT_EQUAL(r, 0);
1818 
1819 				/* clear bo2 */
1820 				memset((void*)bo2_cpu, 0, sdma_write_length);
1821 
1822 				resources[0] = bo1;
1823 				resources[1] = bo2;
1824 
1825 				/* fulfill PM4: test DMA copy linear */
1826 				i = j = 0;
1827 				if (ip_type == AMDGPU_HW_IP_DMA) {
1828 					if (family_id == AMDGPU_FAMILY_SI) {
1829 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1830 									  0, 0, 0,
1831 									  sdma_write_length);
1832 						pm4[i++] = 0xffffffff & bo2_mc;
1833 						pm4[i++] = 0xffffffff & bo1_mc;
1834 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1835 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1836 					} else {
1837 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1838 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1839 								       0);
1840 						if (family_id >= AMDGPU_FAMILY_AI)
1841 							pm4[i++] = sdma_write_length - 1;
1842 						else
1843 							pm4[i++] = sdma_write_length;
1844 						pm4[i++] = 0;
1845 						pm4[i++] = 0xffffffff & bo1_mc;
1846 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1847 						pm4[i++] = 0xffffffff & bo2_mc;
1848 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1849 					}
1850 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1851 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1852 					if (family_id == AMDGPU_FAMILY_SI) {
1853 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1854 						pm4[i++] = 0xfffffffc & bo1_mc;
1855 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1856 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1857 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1858 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1859 							   (0xffff00000000 & bo1_mc) >> 32;
1860 						pm4[i++] = 0xfffffffc & bo2_mc;
1861 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1862 						pm4[i++] = sdma_write_length;
1863 					} else {
1864 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1865 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1866 							   PACKET3_DMA_DATA_DST_SEL(0) |
1867 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1868 							   PACKET3_DMA_DATA_CP_SYNC;
1869 						pm4[i++] = 0xfffffffc & bo1_mc;
1870 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1871 						pm4[i++] = 0xfffffffc & bo2_mc;
1872 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1873 						pm4[i++] = sdma_write_length;
1874 					}
1875 				}
1876 
1877 				amdgpu_test_exec_cs_helper(context_handle,
1878 							   ip_type, ring_id,
1879 							   i, pm4,
1880 							   2, resources,
1881 							   ib_info, ibs_request);
1882 
1883 				/* verify if SDMA test result meets with expected */
1884 				i = 0;
1885 				while(i < sdma_write_length) {
1886 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1887 				}
1888 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1889 							     sdma_write_length);
1890 				CU_ASSERT_EQUAL(r, 0);
1891 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1892 							     sdma_write_length);
1893 				CU_ASSERT_EQUAL(r, 0);
1894 				loop2++;
1895 			}
1896 			loop1++;
1897 		}
1898 	}
1899 	/* clean resources */
1900 	free(resources);
1901 	free(ibs_request);
1902 	free(ib_info);
1903 	free(pm4);
1904 
1905 	/* end of test */
1906 	r = amdgpu_cs_ctx_free(context_handle);
1907 	CU_ASSERT_EQUAL(r, 0);
1908 }
1909 
amdgpu_command_submission_sdma_copy_linear(void)1910 static void amdgpu_command_submission_sdma_copy_linear(void)
1911 {
1912 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1913 }
1914 
amdgpu_command_submission_sdma(void)1915 static void amdgpu_command_submission_sdma(void)
1916 {
1917 	amdgpu_command_submission_sdma_write_linear();
1918 	amdgpu_command_submission_sdma_const_fill();
1919 	amdgpu_command_submission_sdma_copy_linear();
1920 }
1921 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1922 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1923 {
1924 	amdgpu_context_handle context_handle;
1925 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1926 	void *ib_result_cpu, *ib_result_ce_cpu;
1927 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1928 	struct amdgpu_cs_request ibs_request[2] = {0};
1929 	struct amdgpu_cs_ib_info ib_info[2];
1930 	struct amdgpu_cs_fence fence_status[2] = {0};
1931 	uint32_t *ptr;
1932 	uint32_t expired;
1933 	amdgpu_bo_list_handle bo_list;
1934 	amdgpu_va_handle va_handle, va_handle_ce;
1935 	int r;
1936 	int i = 0, ib_cs_num = 2;
1937 
1938 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1939 	CU_ASSERT_EQUAL(r, 0);
1940 
1941 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1942 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1943 				    &ib_result_handle, &ib_result_cpu,
1944 				    &ib_result_mc_address, &va_handle);
1945 	CU_ASSERT_EQUAL(r, 0);
1946 
1947 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1948 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1949 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1950 				    &ib_result_ce_mc_address, &va_handle_ce);
1951 	CU_ASSERT_EQUAL(r, 0);
1952 
1953 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1954 			       ib_result_ce_handle, &bo_list);
1955 	CU_ASSERT_EQUAL(r, 0);
1956 
1957 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1958 
1959 	/* IT_SET_CE_DE_COUNTERS */
1960 	ptr = ib_result_ce_cpu;
1961 	if (family_id != AMDGPU_FAMILY_SI) {
1962 		ptr[i++] = 0xc0008900;
1963 		ptr[i++] = 0;
1964 	}
1965 	ptr[i++] = 0xc0008400;
1966 	ptr[i++] = 1;
1967 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1968 	ib_info[0].size = i;
1969 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1970 
1971 	/* IT_WAIT_ON_CE_COUNTER */
1972 	ptr = ib_result_cpu;
1973 	ptr[0] = 0xc0008600;
1974 	ptr[1] = 0x00000001;
1975 	ib_info[1].ib_mc_address = ib_result_mc_address;
1976 	ib_info[1].size = 2;
1977 
1978 	for (i = 0; i < ib_cs_num; i++) {
1979 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1980 		ibs_request[i].number_of_ibs = 2;
1981 		ibs_request[i].ibs = ib_info;
1982 		ibs_request[i].resources = bo_list;
1983 		ibs_request[i].fence_info.handle = NULL;
1984 	}
1985 
1986 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1987 
1988 	CU_ASSERT_EQUAL(r, 0);
1989 
1990 	for (i = 0; i < ib_cs_num; i++) {
1991 		fence_status[i].context = context_handle;
1992 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1993 		fence_status[i].fence = ibs_request[i].seq_no;
1994 	}
1995 
1996 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1997 				AMDGPU_TIMEOUT_INFINITE,
1998 				&expired, NULL);
1999 	CU_ASSERT_EQUAL(r, 0);
2000 
2001 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2002 				     ib_result_mc_address, 4096);
2003 	CU_ASSERT_EQUAL(r, 0);
2004 
2005 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2006 				     ib_result_ce_mc_address, 4096);
2007 	CU_ASSERT_EQUAL(r, 0);
2008 
2009 	r = amdgpu_bo_list_destroy(bo_list);
2010 	CU_ASSERT_EQUAL(r, 0);
2011 
2012 	r = amdgpu_cs_ctx_free(context_handle);
2013 	CU_ASSERT_EQUAL(r, 0);
2014 }
2015 
amdgpu_command_submission_multi_fence(void)2016 static void amdgpu_command_submission_multi_fence(void)
2017 {
2018 	amdgpu_command_submission_multi_fence_wait_all(true);
2019 	amdgpu_command_submission_multi_fence_wait_all(false);
2020 }
2021 
amdgpu_userptr_test(void)2022 static void amdgpu_userptr_test(void)
2023 {
2024 	int i, r, j;
2025 	uint32_t *pm4 = NULL;
2026 	uint64_t bo_mc;
2027 	void *ptr = NULL;
2028 	int pm4_dw = 256;
2029 	int sdma_write_length = 4;
2030 	amdgpu_bo_handle handle;
2031 	amdgpu_context_handle context_handle;
2032 	struct amdgpu_cs_ib_info *ib_info;
2033 	struct amdgpu_cs_request *ibs_request;
2034 	amdgpu_bo_handle buf_handle;
2035 	amdgpu_va_handle va_handle;
2036 
2037 	pm4 = calloc(pm4_dw, sizeof(*pm4));
2038 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2039 
2040 	ib_info = calloc(1, sizeof(*ib_info));
2041 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2042 
2043 	ibs_request = calloc(1, sizeof(*ibs_request));
2044 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2045 
2046 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2047 	CU_ASSERT_EQUAL(r, 0);
2048 
2049 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2050 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2051 	memset(ptr, 0, BUFFER_SIZE);
2052 
2053 	r = amdgpu_create_bo_from_user_mem(device_handle,
2054 					   ptr, BUFFER_SIZE, &buf_handle);
2055 	CU_ASSERT_EQUAL(r, 0);
2056 
2057 	r = amdgpu_va_range_alloc(device_handle,
2058 				  amdgpu_gpu_va_range_general,
2059 				  BUFFER_SIZE, 1, 0, &bo_mc,
2060 				  &va_handle, 0);
2061 	CU_ASSERT_EQUAL(r, 0);
2062 
2063 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2064 	CU_ASSERT_EQUAL(r, 0);
2065 
2066 	handle = buf_handle;
2067 
2068 	j = i = 0;
2069 
2070 	if (family_id == AMDGPU_FAMILY_SI)
2071 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2072 				sdma_write_length);
2073 	else
2074 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2075 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2076 	pm4[i++] = 0xffffffff & bo_mc;
2077 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2078 	if (family_id >= AMDGPU_FAMILY_AI)
2079 		pm4[i++] = sdma_write_length - 1;
2080 	else if (family_id != AMDGPU_FAMILY_SI)
2081 		pm4[i++] = sdma_write_length;
2082 
2083 	while (j++ < sdma_write_length)
2084 		pm4[i++] = 0xdeadbeaf;
2085 
2086 	if (!fork()) {
2087 		pm4[0] = 0x0;
2088 		exit(0);
2089 	}
2090 
2091 	amdgpu_test_exec_cs_helper(context_handle,
2092 				   AMDGPU_HW_IP_DMA, 0,
2093 				   i, pm4,
2094 				   1, &handle,
2095 				   ib_info, ibs_request);
2096 	i = 0;
2097 	while (i < sdma_write_length) {
2098 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2099 	}
2100 	free(ibs_request);
2101 	free(ib_info);
2102 	free(pm4);
2103 
2104 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2105 	CU_ASSERT_EQUAL(r, 0);
2106 	r = amdgpu_va_range_free(va_handle);
2107 	CU_ASSERT_EQUAL(r, 0);
2108 	r = amdgpu_bo_free(buf_handle);
2109 	CU_ASSERT_EQUAL(r, 0);
2110 	free(ptr);
2111 
2112 	r = amdgpu_cs_ctx_free(context_handle);
2113 	CU_ASSERT_EQUAL(r, 0);
2114 
2115 	wait(NULL);
2116 }
2117 
amdgpu_sync_dependency_test(void)2118 static void amdgpu_sync_dependency_test(void)
2119 {
2120 	amdgpu_context_handle context_handle[2];
2121 	amdgpu_bo_handle ib_result_handle;
2122 	void *ib_result_cpu;
2123 	uint64_t ib_result_mc_address;
2124 	struct amdgpu_cs_request ibs_request;
2125 	struct amdgpu_cs_ib_info ib_info;
2126 	struct amdgpu_cs_fence fence_status;
2127 	uint32_t expired;
2128 	int i, j, r;
2129 	amdgpu_bo_list_handle bo_list;
2130 	amdgpu_va_handle va_handle;
2131 	static uint32_t *ptr;
2132 	uint64_t seq_no;
2133 
2134 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2135 	CU_ASSERT_EQUAL(r, 0);
2136 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2137 	CU_ASSERT_EQUAL(r, 0);
2138 
2139 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2140 			AMDGPU_GEM_DOMAIN_GTT, 0,
2141 						    &ib_result_handle, &ib_result_cpu,
2142 						    &ib_result_mc_address, &va_handle);
2143 	CU_ASSERT_EQUAL(r, 0);
2144 
2145 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2146 			       &bo_list);
2147 	CU_ASSERT_EQUAL(r, 0);
2148 
2149 	ptr = ib_result_cpu;
2150 	i = 0;
2151 
2152 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2153 
2154 	/* Dispatch minimal init config and verify it's executed */
2155 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2156 	ptr[i++] = 0x80000000;
2157 	ptr[i++] = 0x80000000;
2158 
2159 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2160 	ptr[i++] = 0x80000000;
2161 
2162 
2163 	/* Program compute regs */
2164 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2165 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2166 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2167 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2168 
2169 
2170 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2171 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2172 	/*
2173 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2174 	                                      SGPRS = 1
2175 	                                      PRIORITY = 0
2176 	                                      FLOAT_MODE = 192 (0xc0)
2177 	                                      PRIV = 0
2178 	                                      DX10_CLAMP = 1
2179 	                                      DEBUG_MODE = 0
2180 	                                      IEEE_MODE = 0
2181 	                                      BULKY = 0
2182 	                                      CDBG_USER = 0
2183 	 *
2184 	 */
2185 	ptr[i++] = 0x002c0040;
2186 
2187 
2188 	/*
2189 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2190 	                                      USER_SGPR = 8
2191 	                                      TRAP_PRESENT = 0
2192 	                                      TGID_X_EN = 0
2193 	                                      TGID_Y_EN = 0
2194 	                                      TGID_Z_EN = 0
2195 	                                      TG_SIZE_EN = 0
2196 	                                      TIDIG_COMP_CNT = 0
2197 	                                      EXCP_EN_MSB = 0
2198 	                                      LDS_SIZE = 0
2199 	                                      EXCP_EN = 0
2200 	 *
2201 	 */
2202 	ptr[i++] = 0x00000010;
2203 
2204 
2205 /*
2206  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2207                                          WAVESIZE = 0
2208  *
2209  */
2210 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2211 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2212 	ptr[i++] = 0x00000100;
2213 
2214 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2215 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2216 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2217 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2218 
2219 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2220 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2221 	ptr[i++] = 0;
2222 
2223 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2224 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2225 	ptr[i++] = 1;
2226 	ptr[i++] = 1;
2227 	ptr[i++] = 1;
2228 
2229 
2230 	/* Dispatch */
2231 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2232 	ptr[i++] = 1;
2233 	ptr[i++] = 1;
2234 	ptr[i++] = 1;
2235 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2236 
2237 
2238 	while (i & 7)
2239 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2240 
2241 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2242 	ib_info.ib_mc_address = ib_result_mc_address;
2243 	ib_info.size = i;
2244 
2245 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2246 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2247 	ibs_request.ring = 0;
2248 	ibs_request.number_of_ibs = 1;
2249 	ibs_request.ibs = &ib_info;
2250 	ibs_request.resources = bo_list;
2251 	ibs_request.fence_info.handle = NULL;
2252 
2253 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2254 	CU_ASSERT_EQUAL(r, 0);
2255 	seq_no = ibs_request.seq_no;
2256 
2257 
2258 
2259 	/* Prepare second command with dependency on the first */
2260 	j = i;
2261 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2262 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2263 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2264 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2265 	ptr[i++] = 99;
2266 
2267 	while (i & 7)
2268 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2269 
2270 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2271 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2272 	ib_info.size = i - j;
2273 
2274 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2275 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2276 	ibs_request.ring = 0;
2277 	ibs_request.number_of_ibs = 1;
2278 	ibs_request.ibs = &ib_info;
2279 	ibs_request.resources = bo_list;
2280 	ibs_request.fence_info.handle = NULL;
2281 
2282 	ibs_request.number_of_dependencies = 1;
2283 
2284 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2285 	ibs_request.dependencies[0].context = context_handle[1];
2286 	ibs_request.dependencies[0].ip_instance = 0;
2287 	ibs_request.dependencies[0].ring = 0;
2288 	ibs_request.dependencies[0].fence = seq_no;
2289 
2290 
2291 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2292 	CU_ASSERT_EQUAL(r, 0);
2293 
2294 
2295 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2296 	fence_status.context = context_handle[0];
2297 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2298 	fence_status.ip_instance = 0;
2299 	fence_status.ring = 0;
2300 	fence_status.fence = ibs_request.seq_no;
2301 
2302 	r = amdgpu_cs_query_fence_status(&fence_status,
2303 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2304 	CU_ASSERT_EQUAL(r, 0);
2305 
2306 	/* Expect the second command to wait for shader to complete */
2307 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2308 
2309 	r = amdgpu_bo_list_destroy(bo_list);
2310 	CU_ASSERT_EQUAL(r, 0);
2311 
2312 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2313 				     ib_result_mc_address, 4096);
2314 	CU_ASSERT_EQUAL(r, 0);
2315 
2316 	r = amdgpu_cs_ctx_free(context_handle[0]);
2317 	CU_ASSERT_EQUAL(r, 0);
2318 	r = amdgpu_cs_ctx_free(context_handle[1]);
2319 	CU_ASSERT_EQUAL(r, 0);
2320 
2321 	free(ibs_request.dependencies);
2322 }
2323 
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2324 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2325 {
2326 	struct amdgpu_test_shader *shader;
2327 	int i, loop = 0x10000;
2328 
2329 	switch (family) {
2330 		case AMDGPU_FAMILY_AI:
2331 			shader = &memcpy_cs_hang_slow_ai;
2332 			break;
2333 		case AMDGPU_FAMILY_RV:
2334 			shader = &memcpy_cs_hang_slow_rv;
2335 			break;
2336 		default:
2337 			return -1;
2338 			break;
2339 	}
2340 
2341 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2342 
2343 	for (i = 0; i < loop; i++)
2344 		memcpy(ptr + shader->header_length + shader->body_length * i,
2345 			shader->shader + shader->header_length,
2346 			shader->body_length * sizeof(uint32_t));
2347 
2348 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2349 		shader->shader + shader->header_length + shader->body_length,
2350 		shader->foot_length * sizeof(uint32_t));
2351 
2352 	return 0;
2353 }
2354 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2355 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2356 					   int cs_type)
2357 {
2358 	uint32_t shader_size;
2359 	const uint32_t *shader;
2360 
2361 	switch (cs_type) {
2362 		case CS_BUFFERCLEAR:
2363 			shader = bufferclear_cs_shader_gfx9;
2364 			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2365 			break;
2366 		case CS_BUFFERCOPY:
2367 			shader = buffercopy_cs_shader_gfx9;
2368 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2369 			break;
2370 		case CS_HANG:
2371 			shader = memcpy_ps_hang;
2372 			shader_size = sizeof(memcpy_ps_hang);
2373 			break;
2374 		default:
2375 			return -1;
2376 			break;
2377 	}
2378 
2379 	memcpy(ptr, shader, shader_size);
2380 	return 0;
2381 }
2382 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2383 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2384 {
2385 	int i = 0;
2386 
2387 	/* Write context control and load shadowing register if necessary */
2388 	if (ip_type == AMDGPU_HW_IP_GFX) {
2389 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2390 		ptr[i++] = 0x80000000;
2391 		ptr[i++] = 0x80000000;
2392 	}
2393 
2394 	/* Issue commands to set default compute state. */
2395 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2396 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2397 	ptr[i++] = 0x204;
2398 	i += 3;
2399 
2400 	/* clear mmCOMPUTE_TMPRING_SIZE */
2401 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2402 	ptr[i++] = 0x218;
2403 	ptr[i++] = 0;
2404 
2405 	return i;
2406 }
2407 
amdgpu_dispatch_write_cumask(uint32_t * ptr)2408 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2409 {
2410 	int i = 0;
2411 
2412 	/*  Issue commands to set cu mask used in current dispatch */
2413 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2414 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2415 	ptr[i++] = 0x216;
2416 	ptr[i++] = 0xffffffff;
2417 	ptr[i++] = 0xffffffff;
2418 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2419 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2420 	ptr[i++] = 0x219;
2421 	ptr[i++] = 0xffffffff;
2422 	ptr[i++] = 0xffffffff;
2423 
2424 	return i;
2425 }
2426 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2427 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2428 {
2429 	int i, j;
2430 
2431 	i = 0;
2432 
2433 	/* Writes shader state to HW */
2434 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2435 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2436 	ptr[i++] = 0x20c;
2437 	ptr[i++] = (shader_addr >> 8);
2438 	ptr[i++] = (shader_addr >> 40);
2439 	/* write sh regs*/
2440 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2441 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2442 		/* - Gfx9ShRegBase */
2443 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2444 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2445 	}
2446 
2447 	return i;
2448 }
2449 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2450 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2451 					 uint32_t ip_type,
2452 					 uint32_t ring)
2453 {
2454 	amdgpu_context_handle context_handle;
2455 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2456 	volatile unsigned char *ptr_dst;
2457 	void *ptr_shader;
2458 	uint32_t *ptr_cmd;
2459 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2460 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2461 	int i, r;
2462 	int bo_dst_size = 16384;
2463 	int bo_shader_size = 4096;
2464 	int bo_cmd_size = 4096;
2465 	struct amdgpu_cs_request ibs_request = {0};
2466 	struct amdgpu_cs_ib_info ib_info= {0};
2467 	amdgpu_bo_list_handle bo_list;
2468 	struct amdgpu_cs_fence fence_status = {0};
2469 	uint32_t expired;
2470 
2471 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2472 	CU_ASSERT_EQUAL(r, 0);
2473 
2474 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2475 					AMDGPU_GEM_DOMAIN_GTT, 0,
2476 					&bo_cmd, (void **)&ptr_cmd,
2477 					&mc_address_cmd, &va_cmd);
2478 	CU_ASSERT_EQUAL(r, 0);
2479 	memset(ptr_cmd, 0, bo_cmd_size);
2480 
2481 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2482 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2483 					&bo_shader, &ptr_shader,
2484 					&mc_address_shader, &va_shader);
2485 	CU_ASSERT_EQUAL(r, 0);
2486 	memset(ptr_shader, 0, bo_shader_size);
2487 
2488 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2489 	CU_ASSERT_EQUAL(r, 0);
2490 
2491 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2492 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2493 					&bo_dst, (void **)&ptr_dst,
2494 					&mc_address_dst, &va_dst);
2495 	CU_ASSERT_EQUAL(r, 0);
2496 
2497 	i = 0;
2498 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2499 
2500 	/*  Issue commands to set cu mask used in current dispatch */
2501 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2502 
2503 	/* Writes shader state to HW */
2504 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2505 
2506 	/* Write constant data */
2507 	/* Writes the UAV constant data to the SGPRs. */
2508 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2509 	ptr_cmd[i++] = 0x240;
2510 	ptr_cmd[i++] = mc_address_dst;
2511 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2512 	ptr_cmd[i++] = 0x400;
2513 	ptr_cmd[i++] = 0x74fac;
2514 
2515 	/* Sets a range of pixel shader constants */
2516 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2517 	ptr_cmd[i++] = 0x244;
2518 	ptr_cmd[i++] = 0x22222222;
2519 	ptr_cmd[i++] = 0x22222222;
2520 	ptr_cmd[i++] = 0x22222222;
2521 	ptr_cmd[i++] = 0x22222222;
2522 
2523 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2524 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2525 	ptr_cmd[i++] = 0x215;
2526 	ptr_cmd[i++] = 0;
2527 
2528 	/* dispatch direct command */
2529 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2530 	ptr_cmd[i++] = 0x10;
2531 	ptr_cmd[i++] = 1;
2532 	ptr_cmd[i++] = 1;
2533 	ptr_cmd[i++] = 1;
2534 
2535 	while (i & 7)
2536 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2537 
2538 	resources[0] = bo_dst;
2539 	resources[1] = bo_shader;
2540 	resources[2] = bo_cmd;
2541 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2542 	CU_ASSERT_EQUAL(r, 0);
2543 
2544 	ib_info.ib_mc_address = mc_address_cmd;
2545 	ib_info.size = i;
2546 	ibs_request.ip_type = ip_type;
2547 	ibs_request.ring = ring;
2548 	ibs_request.resources = bo_list;
2549 	ibs_request.number_of_ibs = 1;
2550 	ibs_request.ibs = &ib_info;
2551 	ibs_request.fence_info.handle = NULL;
2552 
2553 	/* submit CS */
2554 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2555 	CU_ASSERT_EQUAL(r, 0);
2556 
2557 	r = amdgpu_bo_list_destroy(bo_list);
2558 	CU_ASSERT_EQUAL(r, 0);
2559 
2560 	fence_status.ip_type = ip_type;
2561 	fence_status.ip_instance = 0;
2562 	fence_status.ring = ring;
2563 	fence_status.context = context_handle;
2564 	fence_status.fence = ibs_request.seq_no;
2565 
2566 	/* wait for IB accomplished */
2567 	r = amdgpu_cs_query_fence_status(&fence_status,
2568 					 AMDGPU_TIMEOUT_INFINITE,
2569 					 0, &expired);
2570 	CU_ASSERT_EQUAL(r, 0);
2571 	CU_ASSERT_EQUAL(expired, true);
2572 
2573 	/* verify if memset test result meets with expected */
2574 	i = 0;
2575 	while(i < bo_dst_size) {
2576 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2577 	}
2578 
2579 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2580 	CU_ASSERT_EQUAL(r, 0);
2581 
2582 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2583 	CU_ASSERT_EQUAL(r, 0);
2584 
2585 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2586 	CU_ASSERT_EQUAL(r, 0);
2587 
2588 	r = amdgpu_cs_ctx_free(context_handle);
2589 	CU_ASSERT_EQUAL(r, 0);
2590 }
2591 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2592 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2593 					uint32_t ip_type,
2594 					uint32_t ring,
2595 					int hang)
2596 {
2597 	amdgpu_context_handle context_handle;
2598 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2599 	volatile unsigned char *ptr_dst;
2600 	void *ptr_shader;
2601 	unsigned char *ptr_src;
2602 	uint32_t *ptr_cmd;
2603 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2604 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2605 	int i, r;
2606 	int bo_dst_size = 16384;
2607 	int bo_shader_size = 4096;
2608 	int bo_cmd_size = 4096;
2609 	struct amdgpu_cs_request ibs_request = {0};
2610 	struct amdgpu_cs_ib_info ib_info= {0};
2611 	uint32_t expired, hang_state, hangs;
2612 	enum cs_type cs_type;
2613 	amdgpu_bo_list_handle bo_list;
2614 	struct amdgpu_cs_fence fence_status = {0};
2615 
2616 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2617 	CU_ASSERT_EQUAL(r, 0);
2618 
2619 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2620 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2621 				    &bo_cmd, (void **)&ptr_cmd,
2622 				    &mc_address_cmd, &va_cmd);
2623 	CU_ASSERT_EQUAL(r, 0);
2624 	memset(ptr_cmd, 0, bo_cmd_size);
2625 
2626 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2627 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2628 					&bo_shader, &ptr_shader,
2629 					&mc_address_shader, &va_shader);
2630 	CU_ASSERT_EQUAL(r, 0);
2631 	memset(ptr_shader, 0, bo_shader_size);
2632 
2633 	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2634 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2635 	CU_ASSERT_EQUAL(r, 0);
2636 
2637 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2638 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2639 					&bo_src, (void **)&ptr_src,
2640 					&mc_address_src, &va_src);
2641 	CU_ASSERT_EQUAL(r, 0);
2642 
2643 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2644 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2645 					&bo_dst, (void **)&ptr_dst,
2646 					&mc_address_dst, &va_dst);
2647 	CU_ASSERT_EQUAL(r, 0);
2648 
2649 	memset(ptr_src, 0x55, bo_dst_size);
2650 
2651 	i = 0;
2652 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2653 
2654 	/*  Issue commands to set cu mask used in current dispatch */
2655 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2656 
2657 	/* Writes shader state to HW */
2658 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2659 
2660 	/* Write constant data */
2661 	/* Writes the texture resource constants data to the SGPRs */
2662 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2663 	ptr_cmd[i++] = 0x240;
2664 	ptr_cmd[i++] = mc_address_src;
2665 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2666 	ptr_cmd[i++] = 0x400;
2667 	ptr_cmd[i++] = 0x74fac;
2668 
2669 	/* Writes the UAV constant data to the SGPRs. */
2670 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2671 	ptr_cmd[i++] = 0x244;
2672 	ptr_cmd[i++] = mc_address_dst;
2673 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2674 	ptr_cmd[i++] = 0x400;
2675 	ptr_cmd[i++] = 0x74fac;
2676 
2677 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2678 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2679 	ptr_cmd[i++] = 0x215;
2680 	ptr_cmd[i++] = 0;
2681 
2682 	/* dispatch direct command */
2683 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2684 	ptr_cmd[i++] = 0x10;
2685 	ptr_cmd[i++] = 1;
2686 	ptr_cmd[i++] = 1;
2687 	ptr_cmd[i++] = 1;
2688 
2689 	while (i & 7)
2690 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2691 
2692 	resources[0] = bo_shader;
2693 	resources[1] = bo_src;
2694 	resources[2] = bo_dst;
2695 	resources[3] = bo_cmd;
2696 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2697 	CU_ASSERT_EQUAL(r, 0);
2698 
2699 	ib_info.ib_mc_address = mc_address_cmd;
2700 	ib_info.size = i;
2701 	ibs_request.ip_type = ip_type;
2702 	ibs_request.ring = ring;
2703 	ibs_request.resources = bo_list;
2704 	ibs_request.number_of_ibs = 1;
2705 	ibs_request.ibs = &ib_info;
2706 	ibs_request.fence_info.handle = NULL;
2707 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2708 	CU_ASSERT_EQUAL(r, 0);
2709 
2710 	fence_status.ip_type = ip_type;
2711 	fence_status.ip_instance = 0;
2712 	fence_status.ring = ring;
2713 	fence_status.context = context_handle;
2714 	fence_status.fence = ibs_request.seq_no;
2715 
2716 	/* wait for IB accomplished */
2717 	r = amdgpu_cs_query_fence_status(&fence_status,
2718 					 AMDGPU_TIMEOUT_INFINITE,
2719 					 0, &expired);
2720 
2721 	if (!hang) {
2722 		CU_ASSERT_EQUAL(r, 0);
2723 		CU_ASSERT_EQUAL(expired, true);
2724 
2725 		/* verify if memcpy test result meets with expected */
2726 		i = 0;
2727 		while(i < bo_dst_size) {
2728 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2729 			i++;
2730 		}
2731 	} else {
2732 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2733 		CU_ASSERT_EQUAL(r, 0);
2734 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2735 	}
2736 
2737 	r = amdgpu_bo_list_destroy(bo_list);
2738 	CU_ASSERT_EQUAL(r, 0);
2739 
2740 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2741 	CU_ASSERT_EQUAL(r, 0);
2742 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2743 	CU_ASSERT_EQUAL(r, 0);
2744 
2745 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2746 	CU_ASSERT_EQUAL(r, 0);
2747 
2748 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2749 	CU_ASSERT_EQUAL(r, 0);
2750 
2751 	r = amdgpu_cs_ctx_free(context_handle);
2752 	CU_ASSERT_EQUAL(r, 0);
2753 }
2754 
amdgpu_compute_dispatch_test(void)2755 static void amdgpu_compute_dispatch_test(void)
2756 {
2757 	int r;
2758 	struct drm_amdgpu_info_hw_ip info;
2759 	uint32_t ring_id;
2760 
2761 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2762 	CU_ASSERT_EQUAL(r, 0);
2763 	if (!info.available_rings)
2764 		printf("SKIP ... as there's no compute ring\n");
2765 
2766 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2767 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2768 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2769 	}
2770 }
2771 
amdgpu_gfx_dispatch_test(void)2772 static void amdgpu_gfx_dispatch_test(void)
2773 {
2774 	int r;
2775 	struct drm_amdgpu_info_hw_ip info;
2776 	uint32_t ring_id;
2777 
2778 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2779 	CU_ASSERT_EQUAL(r, 0);
2780 	if (!info.available_rings)
2781 		printf("SKIP ... as there's no graphics ring\n");
2782 
2783 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2784 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2785 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2786 	}
2787 }
2788 
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2789 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2790 {
2791 	int r;
2792 	struct drm_amdgpu_info_hw_ip info;
2793 	uint32_t ring_id;
2794 
2795 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2796 	CU_ASSERT_EQUAL(r, 0);
2797 	if (!info.available_rings)
2798 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2799 
2800 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2801 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2802 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2803 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2804 	}
2805 }
2806 
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2807 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2808 						  uint32_t ip_type, uint32_t ring)
2809 {
2810 	amdgpu_context_handle context_handle;
2811 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2812 	volatile unsigned char *ptr_dst;
2813 	void *ptr_shader;
2814 	unsigned char *ptr_src;
2815 	uint32_t *ptr_cmd;
2816 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2817 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2818 	int i, r;
2819 	int bo_dst_size = 0x4000000;
2820 	int bo_shader_size = 0x400000;
2821 	int bo_cmd_size = 4096;
2822 	struct amdgpu_cs_request ibs_request = {0};
2823 	struct amdgpu_cs_ib_info ib_info= {0};
2824 	uint32_t hang_state, hangs, expired;
2825 	struct amdgpu_gpu_info gpu_info = {0};
2826 	amdgpu_bo_list_handle bo_list;
2827 	struct amdgpu_cs_fence fence_status = {0};
2828 
2829 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2830 	CU_ASSERT_EQUAL(r, 0);
2831 
2832 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2833 	CU_ASSERT_EQUAL(r, 0);
2834 
2835 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2836 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2837 				    &bo_cmd, (void **)&ptr_cmd,
2838 				    &mc_address_cmd, &va_cmd);
2839 	CU_ASSERT_EQUAL(r, 0);
2840 	memset(ptr_cmd, 0, bo_cmd_size);
2841 
2842 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2843 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2844 					&bo_shader, &ptr_shader,
2845 					&mc_address_shader, &va_shader);
2846 	CU_ASSERT_EQUAL(r, 0);
2847 	memset(ptr_shader, 0, bo_shader_size);
2848 
2849 	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2850 	CU_ASSERT_EQUAL(r, 0);
2851 
2852 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2853 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2854 					&bo_src, (void **)&ptr_src,
2855 					&mc_address_src, &va_src);
2856 	CU_ASSERT_EQUAL(r, 0);
2857 
2858 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2859 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2860 					&bo_dst, (void **)&ptr_dst,
2861 					&mc_address_dst, &va_dst);
2862 	CU_ASSERT_EQUAL(r, 0);
2863 
2864 	memset(ptr_src, 0x55, bo_dst_size);
2865 
2866 	i = 0;
2867 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2868 
2869 	/*  Issue commands to set cu mask used in current dispatch */
2870 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2871 
2872 	/* Writes shader state to HW */
2873 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2874 
2875 	/* Write constant data */
2876 	/* Writes the texture resource constants data to the SGPRs */
2877 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2878 	ptr_cmd[i++] = 0x240;
2879 	ptr_cmd[i++] = mc_address_src;
2880 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2881 	ptr_cmd[i++] = 0x400000;
2882 	ptr_cmd[i++] = 0x74fac;
2883 
2884 	/* Writes the UAV constant data to the SGPRs. */
2885 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2886 	ptr_cmd[i++] = 0x244;
2887 	ptr_cmd[i++] = mc_address_dst;
2888 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2889 	ptr_cmd[i++] = 0x400000;
2890 	ptr_cmd[i++] = 0x74fac;
2891 
2892 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2893 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2894 	ptr_cmd[i++] = 0x215;
2895 	ptr_cmd[i++] = 0;
2896 
2897 	/* dispatch direct command */
2898 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2899 	ptr_cmd[i++] = 0x10000;
2900 	ptr_cmd[i++] = 1;
2901 	ptr_cmd[i++] = 1;
2902 	ptr_cmd[i++] = 1;
2903 
2904 	while (i & 7)
2905 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2906 
2907 	resources[0] = bo_shader;
2908 	resources[1] = bo_src;
2909 	resources[2] = bo_dst;
2910 	resources[3] = bo_cmd;
2911 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2912 	CU_ASSERT_EQUAL(r, 0);
2913 
2914 	ib_info.ib_mc_address = mc_address_cmd;
2915 	ib_info.size = i;
2916 	ibs_request.ip_type = ip_type;
2917 	ibs_request.ring = ring;
2918 	ibs_request.resources = bo_list;
2919 	ibs_request.number_of_ibs = 1;
2920 	ibs_request.ibs = &ib_info;
2921 	ibs_request.fence_info.handle = NULL;
2922 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2923 	CU_ASSERT_EQUAL(r, 0);
2924 
2925 	fence_status.ip_type = ip_type;
2926 	fence_status.ip_instance = 0;
2927 	fence_status.ring = ring;
2928 	fence_status.context = context_handle;
2929 	fence_status.fence = ibs_request.seq_no;
2930 
2931 	/* wait for IB accomplished */
2932 	r = amdgpu_cs_query_fence_status(&fence_status,
2933 					 AMDGPU_TIMEOUT_INFINITE,
2934 					 0, &expired);
2935 
2936 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2937 	CU_ASSERT_EQUAL(r, 0);
2938 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2939 
2940 	r = amdgpu_bo_list_destroy(bo_list);
2941 	CU_ASSERT_EQUAL(r, 0);
2942 
2943 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2944 	CU_ASSERT_EQUAL(r, 0);
2945 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2946 	CU_ASSERT_EQUAL(r, 0);
2947 
2948 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2949 	CU_ASSERT_EQUAL(r, 0);
2950 
2951 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2952 	CU_ASSERT_EQUAL(r, 0);
2953 
2954 	r = amdgpu_cs_ctx_free(context_handle);
2955 	CU_ASSERT_EQUAL(r, 0);
2956 }
2957 
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2958 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2959 {
2960 	int r;
2961 	struct drm_amdgpu_info_hw_ip info;
2962 	uint32_t ring_id;
2963 
2964 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2965 	CU_ASSERT_EQUAL(r, 0);
2966 	if (!info.available_rings)
2967 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2968 
2969 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2970 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2971 		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2972 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2973 	}
2974 }
2975 
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2976 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2977 {
2978 	struct amdgpu_test_shader *shader;
2979 	int i, loop = 0x40000;
2980 
2981 	switch (family) {
2982 		case AMDGPU_FAMILY_AI:
2983 		case AMDGPU_FAMILY_RV:
2984 			shader = &memcpy_ps_hang_slow_ai;
2985 			break;
2986 		default:
2987 			return -1;
2988 			break;
2989 	}
2990 
2991 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2992 
2993 	for (i = 0; i < loop; i++)
2994 		memcpy(ptr + shader->header_length + shader->body_length * i,
2995 			shader->shader + shader->header_length,
2996 			shader->body_length * sizeof(uint32_t));
2997 
2998 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2999 		shader->shader + shader->header_length + shader->body_length,
3000 		shader->foot_length * sizeof(uint32_t));
3001 
3002 	return 0;
3003 }
3004 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)3005 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
3006 {
3007 	int i;
3008 	uint32_t shader_offset= 256;
3009 	uint32_t mem_offset, patch_code_offset;
3010 	uint32_t shader_size, patchinfo_code_size;
3011 	const uint32_t *shader;
3012 	const uint32_t *patchinfo_code;
3013 	const uint32_t *patchcode_offset;
3014 
3015 	switch (ps_type) {
3016 		case PS_CONST:
3017 			shader = ps_const_shader_gfx9;
3018 			shader_size = sizeof(ps_const_shader_gfx9);
3019 			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3020 			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3021 			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3022 			break;
3023 		case PS_TEX:
3024 			shader = ps_tex_shader_gfx9;
3025 			shader_size = sizeof(ps_tex_shader_gfx9);
3026 			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3027 			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3028 			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3029 			break;
3030 		case PS_HANG:
3031 			shader = memcpy_ps_hang;
3032 			shader_size = sizeof(memcpy_ps_hang);
3033 
3034 			memcpy(ptr, shader, shader_size);
3035 			return 0;
3036 		default:
3037 			return -1;
3038 			break;
3039 	}
3040 
3041 	/* write main shader program */
3042 	for (i = 0 ; i < 10; i++) {
3043 		mem_offset = i * shader_offset;
3044 		memcpy(ptr + mem_offset, shader, shader_size);
3045 	}
3046 
3047 	/* overwrite patch codes */
3048 	for (i = 0 ; i < 10; i++) {
3049 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3050 		patch_code_offset = i * patchinfo_code_size;
3051 		memcpy(ptr + mem_offset,
3052 			patchinfo_code + patch_code_offset,
3053 			patchinfo_code_size * sizeof(uint32_t));
3054 	}
3055 
3056 	return 0;
3057 }
3058 
3059 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)3060 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
3061 {
3062 	const uint32_t *shader;
3063 	uint32_t shader_size;
3064 
3065 	shader = vs_RectPosTexFast_shader_gfx9;
3066 	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3067 
3068 	memcpy(ptr, shader, shader_size);
3069 
3070 	return 0;
3071 }
3072 
amdgpu_draw_init(uint32_t * ptr)3073 static int amdgpu_draw_init(uint32_t *ptr)
3074 {
3075 	int i = 0;
3076 	const uint32_t *preamblecache_ptr;
3077 	uint32_t preamblecache_size;
3078 
3079 	/* Write context control and load shadowing register if necessary */
3080 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3081 	ptr[i++] = 0x80000000;
3082 	ptr[i++] = 0x80000000;
3083 
3084 	preamblecache_ptr = preamblecache_gfx9;
3085 	preamblecache_size = sizeof(preamblecache_gfx9);
3086 
3087 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3088 	return i + preamblecache_size/sizeof(uint32_t);
3089 }
3090 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)3091 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3092 							 uint64_t dst_addr,
3093 							 int hang_slow)
3094 {
3095 	int i = 0;
3096 
3097 	/* setup color buffer */
3098 	/* offset   reg
3099 	   0xA318   CB_COLOR0_BASE
3100 	   0xA319   CB_COLOR0_BASE_EXT
3101 	   0xA31A   CB_COLOR0_ATTRIB2
3102 	   0xA31B   CB_COLOR0_VIEW
3103 	   0xA31C   CB_COLOR0_INFO
3104 	   0xA31D   CB_COLOR0_ATTRIB
3105 	   0xA31E   CB_COLOR0_DCC_CONTROL
3106 	   0xA31F   CB_COLOR0_CMASK
3107 	   0xA320   CB_COLOR0_CMASK_BASE_EXT
3108 	   0xA321   CB_COLOR0_FMASK
3109 	   0xA322   CB_COLOR0_FMASK_BASE_EXT
3110 	   0xA323   CB_COLOR0_CLEAR_WORD0
3111 	   0xA324   CB_COLOR0_CLEAR_WORD1
3112 	   0xA325   CB_COLOR0_DCC_BASE
3113 	   0xA326   CB_COLOR0_DCC_BASE_EXT */
3114 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3115 	ptr[i++] = 0x318;
3116 	ptr[i++] = dst_addr >> 8;
3117 	ptr[i++] = dst_addr >> 40;
3118 	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
3119 	ptr[i++] = 0;
3120 	ptr[i++] = 0x50438;
3121 	ptr[i++] = 0x10140000;
3122 	i += 9;
3123 
3124 	/* mmCB_MRT0_EPITCH */
3125 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3126 	ptr[i++] = 0x1e8;
3127 	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
3128 
3129 	/* 0xA32B   CB_COLOR1_BASE */
3130 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3131 	ptr[i++] = 0x32b;
3132 	ptr[i++] = 0;
3133 
3134 	/* 0xA33A   CB_COLOR1_BASE */
3135 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3136 	ptr[i++] = 0x33a;
3137 	ptr[i++] = 0;
3138 
3139 	/* SPI_SHADER_COL_FORMAT */
3140 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3141 	ptr[i++] = 0x1c5;
3142 	ptr[i++] = 9;
3143 
3144 	/* Setup depth buffer */
3145 	/* mmDB_Z_INFO */
3146 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3147 	ptr[i++] = 0xe;
3148 	i += 2;
3149 
3150 	return i;
3151 }
3152 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)3153 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
3154 {
3155 	int i = 0;
3156 	const uint32_t *cached_cmd_ptr;
3157 	uint32_t cached_cmd_size;
3158 
3159 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3160 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3161 	ptr[i++] = 0xd7;
3162 	ptr[i++] = 0;
3163 
3164 	ptr[i++] = 0xffff1000;
3165 	ptr[i++] = 0xc0021000;
3166 
3167 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3168 	ptr[i++] = 0xd7;
3169 	ptr[i++] = 1;
3170 
3171 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3172 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3173 	ptr[i++] = 0x2fe;
3174 	i += 16;
3175 
3176 	/* mmPA_SC_CENTROID_PRIORITY_0 */
3177 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3178 	ptr[i++] = 0x2f5;
3179 	i += 2;
3180 
3181 	cached_cmd_ptr = cached_cmd_gfx9;
3182 	cached_cmd_size = sizeof(cached_cmd_gfx9);
3183 
3184 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3185 	if (hang_slow)
3186 		*(ptr + i + 12) = 0x8000800;
3187 	i += cached_cmd_size/sizeof(uint32_t);
3188 
3189 	return i;
3190 }
3191 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)3192 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3193 						  int ps_type,
3194 						  uint64_t shader_addr,
3195 						  int hang_slow)
3196 {
3197 	int i = 0;
3198 
3199 	/* mmPA_CL_VS_OUT_CNTL */
3200 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3201 	ptr[i++] = 0x207;
3202 	ptr[i++] = 0;
3203 
3204 	/* mmSPI_SHADER_PGM_RSRC3_VS */
3205 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3206 	ptr[i++] = 0x46;
3207 	ptr[i++] = 0xffff;
3208 
3209 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3210 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3211 	ptr[i++] = 0x48;
3212 	ptr[i++] = shader_addr >> 8;
3213 	ptr[i++] = shader_addr >> 40;
3214 
3215 	/* mmSPI_SHADER_PGM_RSRC1_VS */
3216 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3217 	ptr[i++] = 0x4a;
3218 	ptr[i++] = 0xc0081;
3219 	/* mmSPI_SHADER_PGM_RSRC2_VS */
3220 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3221 	ptr[i++] = 0x4b;
3222 	ptr[i++] = 0x18;
3223 
3224 	/* mmSPI_VS_OUT_CONFIG */
3225 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3226 	ptr[i++] = 0x1b1;
3227 	ptr[i++] = 2;
3228 
3229 	/* mmSPI_SHADER_POS_FORMAT */
3230 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3231 	ptr[i++] = 0x1c3;
3232 	ptr[i++] = 4;
3233 
3234 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3235 	ptr[i++] = 0x4c;
3236 	i += 2;
3237 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3238 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3239 
3240 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3241 	ptr[i++] = 0x50;
3242 	i += 2;
3243 	if (ps_type == PS_CONST) {
3244 		i += 2;
3245 	} else if (ps_type == PS_TEX) {
3246 		ptr[i++] = 0x3f800000;
3247 		ptr[i++] = 0x3f800000;
3248 	}
3249 
3250 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3251 	ptr[i++] = 0x54;
3252 	i += 4;
3253 
3254 	return i;
3255 }
3256 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3257 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3258 				   int ps_type,
3259 				   uint64_t shader_addr)
3260 {
3261 	int i, j;
3262 	const uint32_t *sh_registers;
3263 	const uint32_t *context_registers;
3264 	uint32_t num_sh_reg, num_context_reg;
3265 
3266 	if (ps_type == PS_CONST) {
3267 		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3268 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3269 		num_sh_reg = ps_num_sh_registers_gfx9;
3270 		num_context_reg = ps_num_context_registers_gfx9;
3271 	} else if (ps_type == PS_TEX) {
3272 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3273 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3274 		num_sh_reg = ps_num_sh_registers_gfx9;
3275 		num_context_reg = ps_num_context_registers_gfx9;
3276 	}
3277 
3278 	i = 0;
3279 
3280 	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3281 	   0x2c08   SPI_SHADER_PGM_LO_PS
3282 	   0x2c09   SPI_SHADER_PGM_HI_PS */
3283 	shader_addr += 256 * 9;
3284 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3285 	ptr[i++] = 0x7;
3286 	ptr[i++] = 0xffff;
3287 	ptr[i++] = shader_addr >> 8;
3288 	ptr[i++] = shader_addr >> 40;
3289 
3290 	for (j = 0; j < num_sh_reg; j++) {
3291 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3292 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3293 		ptr[i++] = sh_registers[j * 2 + 1];
3294 	}
3295 
3296 	for (j = 0; j < num_context_reg; j++) {
3297 		if (context_registers[j * 2] != 0xA1C5) {
3298 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3299 			ptr[i++] = context_registers[j * 2] - 0xa000;
3300 			ptr[i++] = context_registers[j * 2 + 1];
3301 		}
3302 
3303 		if (context_registers[j * 2] == 0xA1B4) {
3304 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3305 			ptr[i++] = 0x1b3;
3306 			ptr[i++] = 2;
3307 		}
3308 	}
3309 
3310 	return i;
3311 }
3312 
amdgpu_draw_draw(uint32_t * ptr)3313 static int amdgpu_draw_draw(uint32_t *ptr)
3314 {
3315 	int i = 0;
3316 
3317 	/* mmIA_MULTI_VGT_PARAM */
3318 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3319 	ptr[i++] = 0x40000258;
3320 	ptr[i++] = 0xd00ff;
3321 
3322 	/* mmVGT_PRIMITIVE_TYPE */
3323 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3324 	ptr[i++] = 0x10000242;
3325 	ptr[i++] = 0x11;
3326 
3327 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3328 	ptr[i++] = 3;
3329 	ptr[i++] = 2;
3330 
3331 	return i;
3332 }
3333 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3334 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3335 			amdgpu_bo_handle bo_shader_ps,
3336 			amdgpu_bo_handle bo_shader_vs,
3337 			uint64_t mc_address_shader_ps,
3338 			uint64_t mc_address_shader_vs,
3339 			uint32_t ring_id)
3340 {
3341 	amdgpu_context_handle context_handle;
3342 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3343 	volatile unsigned char *ptr_dst;
3344 	uint32_t *ptr_cmd;
3345 	uint64_t mc_address_dst, mc_address_cmd;
3346 	amdgpu_va_handle va_dst, va_cmd;
3347 	int i, r;
3348 	int bo_dst_size = 16384;
3349 	int bo_cmd_size = 4096;
3350 	struct amdgpu_cs_request ibs_request = {0};
3351 	struct amdgpu_cs_ib_info ib_info = {0};
3352 	struct amdgpu_cs_fence fence_status = {0};
3353 	uint32_t expired;
3354 	amdgpu_bo_list_handle bo_list;
3355 
3356 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3357 	CU_ASSERT_EQUAL(r, 0);
3358 
3359 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3360 					AMDGPU_GEM_DOMAIN_GTT, 0,
3361 					&bo_cmd, (void **)&ptr_cmd,
3362 					&mc_address_cmd, &va_cmd);
3363 	CU_ASSERT_EQUAL(r, 0);
3364 	memset(ptr_cmd, 0, bo_cmd_size);
3365 
3366 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3367 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3368 					&bo_dst, (void **)&ptr_dst,
3369 					&mc_address_dst, &va_dst);
3370 	CU_ASSERT_EQUAL(r, 0);
3371 
3372 	i = 0;
3373 	i += amdgpu_draw_init(ptr_cmd + i);
3374 
3375 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3376 
3377 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3378 
3379 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3380 
3381 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3382 
3383 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3384 	ptr_cmd[i++] = 0xc;
3385 	ptr_cmd[i++] = 0x33333333;
3386 	ptr_cmd[i++] = 0x33333333;
3387 	ptr_cmd[i++] = 0x33333333;
3388 	ptr_cmd[i++] = 0x33333333;
3389 
3390 	i += amdgpu_draw_draw(ptr_cmd + i);
3391 
3392 	while (i & 7)
3393 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3394 
3395 	resources[0] = bo_dst;
3396 	resources[1] = bo_shader_ps;
3397 	resources[2] = bo_shader_vs;
3398 	resources[3] = bo_cmd;
3399 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3400 	CU_ASSERT_EQUAL(r, 0);
3401 
3402 	ib_info.ib_mc_address = mc_address_cmd;
3403 	ib_info.size = i;
3404 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3405 	ibs_request.ring = ring_id;
3406 	ibs_request.resources = bo_list;
3407 	ibs_request.number_of_ibs = 1;
3408 	ibs_request.ibs = &ib_info;
3409 	ibs_request.fence_info.handle = NULL;
3410 
3411 	/* submit CS */
3412 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3413 	CU_ASSERT_EQUAL(r, 0);
3414 
3415 	r = amdgpu_bo_list_destroy(bo_list);
3416 	CU_ASSERT_EQUAL(r, 0);
3417 
3418 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3419 	fence_status.ip_instance = 0;
3420 	fence_status.ring = ring_id;
3421 	fence_status.context = context_handle;
3422 	fence_status.fence = ibs_request.seq_no;
3423 
3424 	/* wait for IB accomplished */
3425 	r = amdgpu_cs_query_fence_status(&fence_status,
3426 					 AMDGPU_TIMEOUT_INFINITE,
3427 					 0, &expired);
3428 	CU_ASSERT_EQUAL(r, 0);
3429 	CU_ASSERT_EQUAL(expired, true);
3430 
3431 	/* verify if memset test result meets with expected */
3432 	i = 0;
3433 	while(i < bo_dst_size) {
3434 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3435 	}
3436 
3437 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3438 	CU_ASSERT_EQUAL(r, 0);
3439 
3440 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3441 	CU_ASSERT_EQUAL(r, 0);
3442 
3443 	r = amdgpu_cs_ctx_free(context_handle);
3444 	CU_ASSERT_EQUAL(r, 0);
3445 }
3446 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3447 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3448 				    uint32_t ring)
3449 {
3450 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3451 	void *ptr_shader_ps;
3452 	void *ptr_shader_vs;
3453 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3454 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3455 	int r;
3456 	int bo_shader_size = 4096;
3457 
3458 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3459 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3460 					&bo_shader_ps, &ptr_shader_ps,
3461 					&mc_address_shader_ps, &va_shader_ps);
3462 	CU_ASSERT_EQUAL(r, 0);
3463 	memset(ptr_shader_ps, 0, bo_shader_size);
3464 
3465 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3466 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3467 					&bo_shader_vs, &ptr_shader_vs,
3468 					&mc_address_shader_vs, &va_shader_vs);
3469 	CU_ASSERT_EQUAL(r, 0);
3470 	memset(ptr_shader_vs, 0, bo_shader_size);
3471 
3472 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3473 	CU_ASSERT_EQUAL(r, 0);
3474 
3475 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3476 	CU_ASSERT_EQUAL(r, 0);
3477 
3478 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3479 			mc_address_shader_ps, mc_address_shader_vs, ring);
3480 
3481 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3482 	CU_ASSERT_EQUAL(r, 0);
3483 
3484 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3485 	CU_ASSERT_EQUAL(r, 0);
3486 }
3487 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3488 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3489 			       amdgpu_bo_handle bo_shader_ps,
3490 			       amdgpu_bo_handle bo_shader_vs,
3491 			       uint64_t mc_address_shader_ps,
3492 			       uint64_t mc_address_shader_vs,
3493 			       uint32_t ring, int hang)
3494 {
3495 	amdgpu_context_handle context_handle;
3496 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3497 	volatile unsigned char *ptr_dst;
3498 	unsigned char *ptr_src;
3499 	uint32_t *ptr_cmd;
3500 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3501 	amdgpu_va_handle va_dst, va_src, va_cmd;
3502 	int i, r;
3503 	int bo_size = 16384;
3504 	int bo_cmd_size = 4096;
3505 	struct amdgpu_cs_request ibs_request = {0};
3506 	struct amdgpu_cs_ib_info ib_info= {0};
3507 	uint32_t hang_state, hangs;
3508 	uint32_t expired;
3509 	amdgpu_bo_list_handle bo_list;
3510 	struct amdgpu_cs_fence fence_status = {0};
3511 
3512 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3513 	CU_ASSERT_EQUAL(r, 0);
3514 
3515 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3516 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3517 				    &bo_cmd, (void **)&ptr_cmd,
3518 				    &mc_address_cmd, &va_cmd);
3519 	CU_ASSERT_EQUAL(r, 0);
3520 	memset(ptr_cmd, 0, bo_cmd_size);
3521 
3522 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3523 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3524 					&bo_src, (void **)&ptr_src,
3525 					&mc_address_src, &va_src);
3526 	CU_ASSERT_EQUAL(r, 0);
3527 
3528 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3529 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3530 					&bo_dst, (void **)&ptr_dst,
3531 					&mc_address_dst, &va_dst);
3532 	CU_ASSERT_EQUAL(r, 0);
3533 
3534 	memset(ptr_src, 0x55, bo_size);
3535 
3536 	i = 0;
3537 	i += amdgpu_draw_init(ptr_cmd + i);
3538 
3539 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3540 
3541 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3542 
3543 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3544 
3545 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3546 
3547 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3548 	ptr_cmd[i++] = 0xc;
3549 	ptr_cmd[i++] = mc_address_src >> 8;
3550 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3551 	ptr_cmd[i++] = 0x7c01f;
3552 	ptr_cmd[i++] = 0x90500fac;
3553 	ptr_cmd[i++] = 0x3e000;
3554 	i += 3;
3555 
3556 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3557 	ptr_cmd[i++] = 0x14;
3558 	ptr_cmd[i++] = 0x92;
3559 	i += 3;
3560 
3561 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3562 	ptr_cmd[i++] = 0x191;
3563 	ptr_cmd[i++] = 0;
3564 
3565 	i += amdgpu_draw_draw(ptr_cmd + i);
3566 
3567 	while (i & 7)
3568 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3569 
3570 	resources[0] = bo_dst;
3571 	resources[1] = bo_src;
3572 	resources[2] = bo_shader_ps;
3573 	resources[3] = bo_shader_vs;
3574 	resources[4] = bo_cmd;
3575 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3576 	CU_ASSERT_EQUAL(r, 0);
3577 
3578 	ib_info.ib_mc_address = mc_address_cmd;
3579 	ib_info.size = i;
3580 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3581 	ibs_request.ring = ring;
3582 	ibs_request.resources = bo_list;
3583 	ibs_request.number_of_ibs = 1;
3584 	ibs_request.ibs = &ib_info;
3585 	ibs_request.fence_info.handle = NULL;
3586 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3587 	CU_ASSERT_EQUAL(r, 0);
3588 
3589 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3590 	fence_status.ip_instance = 0;
3591 	fence_status.ring = ring;
3592 	fence_status.context = context_handle;
3593 	fence_status.fence = ibs_request.seq_no;
3594 
3595 	/* wait for IB accomplished */
3596 	r = amdgpu_cs_query_fence_status(&fence_status,
3597 					 AMDGPU_TIMEOUT_INFINITE,
3598 					 0, &expired);
3599 	if (!hang) {
3600 		CU_ASSERT_EQUAL(r, 0);
3601 		CU_ASSERT_EQUAL(expired, true);
3602 
3603 		/* verify if memcpy test result meets with expected */
3604 		i = 0;
3605 		while(i < bo_size) {
3606 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3607 			i++;
3608 		}
3609 	} else {
3610 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3611 		CU_ASSERT_EQUAL(r, 0);
3612 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3613 	}
3614 
3615 	r = amdgpu_bo_list_destroy(bo_list);
3616 	CU_ASSERT_EQUAL(r, 0);
3617 
3618 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3619 	CU_ASSERT_EQUAL(r, 0);
3620 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3621 	CU_ASSERT_EQUAL(r, 0);
3622 
3623 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3624 	CU_ASSERT_EQUAL(r, 0);
3625 
3626 	r = amdgpu_cs_ctx_free(context_handle);
3627 	CU_ASSERT_EQUAL(r, 0);
3628 }
3629 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3630 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3631 			     int hang)
3632 {
3633 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3634 	void *ptr_shader_ps;
3635 	void *ptr_shader_vs;
3636 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3637 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3638 	int bo_shader_size = 4096;
3639 	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3640 	int r;
3641 
3642 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3643 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3644 					&bo_shader_ps, &ptr_shader_ps,
3645 					&mc_address_shader_ps, &va_shader_ps);
3646 	CU_ASSERT_EQUAL(r, 0);
3647 	memset(ptr_shader_ps, 0, bo_shader_size);
3648 
3649 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3650 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3651 					&bo_shader_vs, &ptr_shader_vs,
3652 					&mc_address_shader_vs, &va_shader_vs);
3653 	CU_ASSERT_EQUAL(r, 0);
3654 	memset(ptr_shader_vs, 0, bo_shader_size);
3655 
3656 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3657 	CU_ASSERT_EQUAL(r, 0);
3658 
3659 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3660 	CU_ASSERT_EQUAL(r, 0);
3661 
3662 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3663 			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3664 
3665 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3666 	CU_ASSERT_EQUAL(r, 0);
3667 
3668 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3669 	CU_ASSERT_EQUAL(r, 0);
3670 }
3671 
amdgpu_draw_test(void)3672 static void amdgpu_draw_test(void)
3673 {
3674 	int r;
3675 	struct drm_amdgpu_info_hw_ip info;
3676 	uint32_t ring_id;
3677 
3678 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3679 	CU_ASSERT_EQUAL(r, 0);
3680 	if (!info.available_rings)
3681 		printf("SKIP ... as there's no graphics ring\n");
3682 
3683 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3684 		amdgpu_memset_draw_test(device_handle, ring_id);
3685 		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3686 	}
3687 }
3688 
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3689 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3690 {
3691 	amdgpu_context_handle context_handle;
3692 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3693 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3694 	void *ptr_shader_ps;
3695 	void *ptr_shader_vs;
3696 	volatile unsigned char *ptr_dst;
3697 	unsigned char *ptr_src;
3698 	uint32_t *ptr_cmd;
3699 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3700 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3701 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3702 	amdgpu_va_handle va_dst, va_src, va_cmd;
3703 	struct amdgpu_gpu_info gpu_info = {0};
3704 	int i, r;
3705 	int bo_size = 0x4000000;
3706 	int bo_shader_ps_size = 0x400000;
3707 	int bo_shader_vs_size = 4096;
3708 	int bo_cmd_size = 4096;
3709 	struct amdgpu_cs_request ibs_request = {0};
3710 	struct amdgpu_cs_ib_info ib_info= {0};
3711 	uint32_t hang_state, hangs, expired;
3712 	amdgpu_bo_list_handle bo_list;
3713 	struct amdgpu_cs_fence fence_status = {0};
3714 
3715 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3716 	CU_ASSERT_EQUAL(r, 0);
3717 
3718 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3719 	CU_ASSERT_EQUAL(r, 0);
3720 
3721 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3722 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3723 				    &bo_cmd, (void **)&ptr_cmd,
3724 				    &mc_address_cmd, &va_cmd);
3725 	CU_ASSERT_EQUAL(r, 0);
3726 	memset(ptr_cmd, 0, bo_cmd_size);
3727 
3728 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3729 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3730 					&bo_shader_ps, &ptr_shader_ps,
3731 					&mc_address_shader_ps, &va_shader_ps);
3732 	CU_ASSERT_EQUAL(r, 0);
3733 	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3734 
3735 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3736 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3737 					&bo_shader_vs, &ptr_shader_vs,
3738 					&mc_address_shader_vs, &va_shader_vs);
3739 	CU_ASSERT_EQUAL(r, 0);
3740 	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3741 
3742 	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3743 	CU_ASSERT_EQUAL(r, 0);
3744 
3745 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3746 	CU_ASSERT_EQUAL(r, 0);
3747 
3748 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3749 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3750 					&bo_src, (void **)&ptr_src,
3751 					&mc_address_src, &va_src);
3752 	CU_ASSERT_EQUAL(r, 0);
3753 
3754 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3755 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3756 					&bo_dst, (void **)&ptr_dst,
3757 					&mc_address_dst, &va_dst);
3758 	CU_ASSERT_EQUAL(r, 0);
3759 
3760 	memset(ptr_src, 0x55, bo_size);
3761 
3762 	i = 0;
3763 	i += amdgpu_draw_init(ptr_cmd + i);
3764 
3765 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3766 
3767 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3768 
3769 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3770 							mc_address_shader_vs, 1);
3771 
3772 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3773 
3774 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3775 	ptr_cmd[i++] = 0xc;
3776 	ptr_cmd[i++] = mc_address_src >> 8;
3777 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3778 	ptr_cmd[i++] = 0x1ffc7ff;
3779 	ptr_cmd[i++] = 0x90500fac;
3780 	ptr_cmd[i++] = 0xffe000;
3781 	i += 3;
3782 
3783 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3784 	ptr_cmd[i++] = 0x14;
3785 	ptr_cmd[i++] = 0x92;
3786 	i += 3;
3787 
3788 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3789 	ptr_cmd[i++] = 0x191;
3790 	ptr_cmd[i++] = 0;
3791 
3792 	i += amdgpu_draw_draw(ptr_cmd + i);
3793 
3794 	while (i & 7)
3795 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3796 
3797 	resources[0] = bo_dst;
3798 	resources[1] = bo_src;
3799 	resources[2] = bo_shader_ps;
3800 	resources[3] = bo_shader_vs;
3801 	resources[4] = bo_cmd;
3802 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3803 	CU_ASSERT_EQUAL(r, 0);
3804 
3805 	ib_info.ib_mc_address = mc_address_cmd;
3806 	ib_info.size = i;
3807 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3808 	ibs_request.ring = ring;
3809 	ibs_request.resources = bo_list;
3810 	ibs_request.number_of_ibs = 1;
3811 	ibs_request.ibs = &ib_info;
3812 	ibs_request.fence_info.handle = NULL;
3813 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3814 	CU_ASSERT_EQUAL(r, 0);
3815 
3816 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3817 	fence_status.ip_instance = 0;
3818 	fence_status.ring = ring;
3819 	fence_status.context = context_handle;
3820 	fence_status.fence = ibs_request.seq_no;
3821 
3822 	/* wait for IB accomplished */
3823 	r = amdgpu_cs_query_fence_status(&fence_status,
3824 					 AMDGPU_TIMEOUT_INFINITE,
3825 					 0, &expired);
3826 
3827 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3828 	CU_ASSERT_EQUAL(r, 0);
3829 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3830 
3831 	r = amdgpu_bo_list_destroy(bo_list);
3832 	CU_ASSERT_EQUAL(r, 0);
3833 
3834 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3835 	CU_ASSERT_EQUAL(r, 0);
3836 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3837 	CU_ASSERT_EQUAL(r, 0);
3838 
3839 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3840 	CU_ASSERT_EQUAL(r, 0);
3841 
3842 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3843 	CU_ASSERT_EQUAL(r, 0);
3844 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3845 	CU_ASSERT_EQUAL(r, 0);
3846 
3847 	r = amdgpu_cs_ctx_free(context_handle);
3848 	CU_ASSERT_EQUAL(r, 0);
3849 }
3850 
amdgpu_gpu_reset_test(void)3851 static void amdgpu_gpu_reset_test(void)
3852 {
3853 	int r;
3854 	char debugfs_path[256], tmp[10];
3855 	int fd;
3856 	struct stat sbuf;
3857 	amdgpu_context_handle context_handle;
3858 	uint32_t hang_state, hangs;
3859 
3860 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3861 	CU_ASSERT_EQUAL(r, 0);
3862 
3863 	r = fstat(drm_amdgpu[0], &sbuf);
3864 	CU_ASSERT_EQUAL(r, 0);
3865 
3866 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3867 	fd = open(debugfs_path, O_RDONLY);
3868 	CU_ASSERT(fd >= 0);
3869 
3870 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3871 	CU_ASSERT(r > 0);
3872 
3873 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3874 	CU_ASSERT_EQUAL(r, 0);
3875 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3876 
3877 	close(fd);
3878 	r = amdgpu_cs_ctx_free(context_handle);
3879 	CU_ASSERT_EQUAL(r, 0);
3880 
3881 	amdgpu_compute_dispatch_test();
3882 	amdgpu_gfx_dispatch_test();
3883 }
3884