• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "util_math.h"
43 
44 static  amdgpu_device_handle device_handle;
45 static  uint32_t  major_version;
46 static  uint32_t  minor_version;
47 static  uint32_t  family_id;
48 
49 static void amdgpu_query_info_test(void);
50 static void amdgpu_command_submission_gfx(void);
51 static void amdgpu_command_submission_compute(void);
52 static void amdgpu_command_submission_multi_fence(void);
53 static void amdgpu_command_submission_sdma(void);
54 static void amdgpu_userptr_test(void);
55 static void amdgpu_semaphore_test(void);
56 static void amdgpu_sync_dependency_test(void);
57 static void amdgpu_bo_eviction_test(void);
58 static void amdgpu_compute_dispatch_test(void);
59 static void amdgpu_gfx_dispatch_test(void);
60 static void amdgpu_draw_test(void);
61 static void amdgpu_gpu_reset_test(void);
62 
63 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
64 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
65 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
66 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
67 				       unsigned ip_type,
68 				       int instance, int pm4_dw, uint32_t *pm4_src,
69 				       int res_cnt, amdgpu_bo_handle *resources,
70 				       struct amdgpu_cs_ib_info *ib_info,
71 				       struct amdgpu_cs_request *ibs_request);
72 
73 CU_TestInfo basic_tests[] = {
74 	{ "Query Info Test",  amdgpu_query_info_test },
75 	{ "Userptr Test",  amdgpu_userptr_test },
76 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
77 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
78 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
79 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
80 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
81 	{ "SW semaphore Test",  amdgpu_semaphore_test },
82 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
83 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
84 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
85 	{ "Draw Test",  amdgpu_draw_test },
86 	{ "GPU reset Test", amdgpu_gpu_reset_test },
87 	CU_TEST_INFO_NULL,
88 };
89 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
90 #define SDMA_PKT_HEADER_op_offset 0
91 #define SDMA_PKT_HEADER_op_mask   0x000000FF
92 #define SDMA_PKT_HEADER_op_shift  0
93 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
94 #define SDMA_OPCODE_CONSTANT_FILL  11
95 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
96 	/* 0 = byte fill
97 	 * 2 = DW fill
98 	 */
99 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
100 					(((sub_op) & 0xFF) << 8) |	\
101 					(((op) & 0xFF) << 0))
102 #define	SDMA_OPCODE_WRITE				  2
103 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
104 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
105 
106 #define	SDMA_OPCODE_COPY				  1
107 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
108 
109 #define GFX_COMPUTE_NOP  0xffff1000
110 #define SDMA_NOP  0x0
111 
112 /* PM4 */
113 #define	PACKET_TYPE0	0
114 #define	PACKET_TYPE1	1
115 #define	PACKET_TYPE2	2
116 #define	PACKET_TYPE3	3
117 
118 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
119 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
120 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
121 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
122 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
123 			 ((reg) & 0xFFFF) |			\
124 			 ((n) & 0x3FFF) << 16)
125 #define CP_PACKET2			0x80000000
126 #define		PACKET2_PAD_SHIFT		0
127 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
128 
129 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
130 
131 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
132 			 (((op) & 0xFF) << 8) |				\
133 			 ((n) & 0x3FFF) << 16)
134 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
135 
136 /* Packet 3 types */
137 #define	PACKET3_NOP					0x10
138 
139 #define	PACKET3_WRITE_DATA				0x37
140 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
141 		/* 0 - register
142 		 * 1 - memory (sync - via GRBM)
143 		 * 2 - gl2
144 		 * 3 - gds
145 		 * 4 - reserved
146 		 * 5 - memory (async - direct)
147 		 */
148 #define		WR_ONE_ADDR                             (1 << 16)
149 #define		WR_CONFIRM                              (1 << 20)
150 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
151 		/* 0 - LRU
152 		 * 1 - Stream
153 		 */
154 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
155 		/* 0 - me
156 		 * 1 - pfp
157 		 * 2 - ce
158 		 */
159 
160 #define	PACKET3_DMA_DATA				0x50
161 /* 1. header
162  * 2. CONTROL
163  * 3. SRC_ADDR_LO or DATA [31:0]
164  * 4. SRC_ADDR_HI [31:0]
165  * 5. DST_ADDR_LO [31:0]
166  * 6. DST_ADDR_HI [7:0]
167  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
168  */
169 /* CONTROL */
170 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
171 		/* 0 - ME
172 		 * 1 - PFP
173 		 */
174 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
175 		/* 0 - LRU
176 		 * 1 - Stream
177 		 * 2 - Bypass
178 		 */
179 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
180 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
181 		/* 0 - DST_ADDR using DAS
182 		 * 1 - GDS
183 		 * 3 - DST_ADDR using L2
184 		 */
185 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
186 		/* 0 - LRU
187 		 * 1 - Stream
188 		 * 2 - Bypass
189 		 */
190 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
191 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
192 		/* 0 - SRC_ADDR using SAS
193 		 * 1 - GDS
194 		 * 2 - DATA
195 		 * 3 - SRC_ADDR using L2
196 		 */
197 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
198 /* COMMAND */
199 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
200 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
201 		/* 0 - none
202 		 * 1 - 8 in 16
203 		 * 2 - 8 in 32
204 		 * 3 - 8 in 64
205 		 */
206 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
207 		/* 0 - none
208 		 * 1 - 8 in 16
209 		 * 2 - 8 in 32
210 		 * 3 - 8 in 64
211 		 */
212 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
213 		/* 0 - memory
214 		 * 1 - register
215 		 */
216 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
217 		/* 0 - memory
218 		 * 1 - register
219 		 */
220 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
221 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
222 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
223 
224 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
225 						(((b) & 0x1) << 26) |		\
226 						(((t) & 0x1) << 23) |		\
227 						(((s) & 0x1) << 22) |		\
228 						(((cnt) & 0xFFFFF) << 0))
229 #define	SDMA_OPCODE_COPY_SI	3
230 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
231 #define SDMA_NOP_SI  0xf
232 #define GFX_COMPUTE_NOP_SI 0x80000000
233 #define	PACKET3_DMA_DATA_SI	0x41
234 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
235 		/* 0 - ME
236 		 * 1 - PFP
237 		 */
238 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
239 		/* 0 - DST_ADDR using DAS
240 		 * 1 - GDS
241 		 * 3 - DST_ADDR using L2
242 		 */
243 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
244 		/* 0 - SRC_ADDR using SAS
245 		 * 1 - GDS
246 		 * 2 - DATA
247 		 * 3 - SRC_ADDR using L2
248 		 */
249 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
250 
251 
252 #define PKT3_CONTEXT_CONTROL                   0x28
253 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
254 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
255 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
256 
257 #define PKT3_CLEAR_STATE                       0x12
258 
259 #define PKT3_SET_SH_REG                        0x76
260 #define		PACKET3_SET_SH_REG_START			0x00002c00
261 
262 #define	PACKET3_DISPATCH_DIRECT				0x15
263 #define PACKET3_EVENT_WRITE				0x46
264 #define PACKET3_ACQUIRE_MEM				0x58
265 #define PACKET3_SET_CONTEXT_REG				0x69
266 #define PACKET3_SET_UCONFIG_REG				0x79
267 #define PACKET3_DRAW_INDEX_AUTO				0x2D
268 /* gfx 8 */
269 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
270 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
271 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
272 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
273 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
274 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
275 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
276 
277 
278 
279 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
280 		      ((num & 0x0000ff00) << 8) | \
281 		      ((num & 0x00ff0000) >> 8) | \
282 		      ((num & 0x000000ff) << 24))
283 
284 
285 /* Shader code
286  * void main()
287 {
288 
289 	float x = some_input;
290 		for (unsigned i = 0; i < 1000000; i++)
291   	x = sin(x);
292 
293 	u[0] = 42u;
294 }
295 */
296 
297 static  uint32_t shader_bin[] = {
298 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
299 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
300 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
301 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
302 };
303 
304 #define CODE_OFFSET 512
305 #define DATA_OFFSET 1024
306 
307 enum cs_type {
308 	CS_BUFFERCLEAR,
309 	CS_BUFFERCOPY,
310 	CS_HANG,
311 	CS_HANG_SLOW
312 };
313 
314 static const uint32_t bufferclear_cs_shader_gfx9[] = {
315     0xD1FD0000, 0x04010C08, 0x7E020204, 0x7E040205,
316     0x7E060206, 0x7E080207, 0xE01C2000, 0x80000100,
317     0xBF810000
318 };
319 
320 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
321 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
322 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
323 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
324 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
325 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
326 };
327 
328 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
329 
330 static const uint32_t buffercopy_cs_shader_gfx9[] = {
331     0xD1FD0000, 0x04010C08, 0xE00C2000, 0x80000100,
332     0xBF8C0F70, 0xE01C2000, 0x80010100, 0xBF810000
333 };
334 
335 static const uint32_t preamblecache_gfx9[] = {
336 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
337 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
338 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
339 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
340 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
341 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
342 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
343 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
344 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
345 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
346 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
347 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
348 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
349 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
350 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
351 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
352 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
353 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
354 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
355 	0xc0017900, 0x24b, 0x0
356 };
357 
358 enum ps_type {
359 	PS_CONST,
360 	PS_TEX,
361 	PS_HANG,
362 	PS_HANG_SLOW
363 };
364 
365 static const uint32_t ps_const_shader_gfx9[] = {
366     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
367     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
368     0xC4001C0F, 0x00000100, 0xBF810000
369 };
370 
371 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
372 
373 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
374     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
375      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
376      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
377      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
378      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
379      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
380      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
381      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
382      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
383      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
384     }
385 };
386 
387 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
388     0x00000004
389 };
390 
391 static const uint32_t ps_num_sh_registers_gfx9 = 2;
392 
393 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
394     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
395     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
396 };
397 
398 static const uint32_t ps_num_context_registers_gfx9 = 7;
399 
400 static const uint32_t ps_const_context_reg_gfx9[][2] = {
401     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
402     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
403     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
404     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
405     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
406     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
407     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
408 };
409 
410 static const uint32_t ps_tex_shader_gfx9[] = {
411     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
412     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
413     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
414     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
415     0x00000100, 0xBF810000
416 };
417 
418 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
419     0x0000000B
420 };
421 
422 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
423 
424 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
425     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
426      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
427      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
428      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
429      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
430      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
431      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
432      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
433      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
434      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
435     }
436 };
437 
438 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
439     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
440     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
441 };
442 
443 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
444     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
445     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
446     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
447     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
448     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
449     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
450     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
451 };
452 
453 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
454     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
455     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
456     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
457     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
458     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
459     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
460     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
461     0xC400020F, 0x05060403, 0xBF810000
462 };
463 
464 static const uint32_t cached_cmd_gfx9[] = {
465 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
466 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
467 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
468 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
469 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
470 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
471 	0xc0026900, 0x292, 0x20, 0x60201b8,
472 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
473 };
474 
475 unsigned int memcpy_ps_hang[] = {
476         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
477         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
478         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
479         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
480         0xF800180F, 0x03020100, 0xBF810000
481 };
482 
483 struct amdgpu_test_shader {
484 	uint32_t *shader;
485 	uint32_t header_length;
486 	uint32_t body_length;
487 	uint32_t foot_length;
488 };
489 
490 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
491     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
492     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
493 };
494 
495 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
496         memcpy_cs_hang_slow_ai_codes,
497         4,
498         3,
499         1
500 };
501 
502 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
503     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
504     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
505 };
506 
507 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
508         memcpy_cs_hang_slow_rv_codes,
509         4,
510         3,
511         1
512 };
513 
514 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
515         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
516         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
517         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
518         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
519         0x03020100, 0xbf810000
520 };
521 
522 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
523         memcpy_ps_hang_slow_ai_codes,
524         7,
525         2,
526         9
527 };
528 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)529 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
530 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
531 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
532 			uint64_t *mc_address,
533 			amdgpu_va_handle *va_handle)
534 {
535 	struct amdgpu_bo_alloc_request request = {};
536 	amdgpu_bo_handle buf_handle;
537 	amdgpu_va_handle handle;
538 	uint64_t vmc_addr;
539 	int r;
540 
541 	request.alloc_size = size;
542 	request.phys_alignment = alignment;
543 	request.preferred_heap = heap;
544 	request.flags = alloc_flags;
545 
546 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
547 	if (r)
548 		return r;
549 
550 	r = amdgpu_va_range_alloc(dev,
551 				  amdgpu_gpu_va_range_general,
552 				  size, alignment, 0, &vmc_addr,
553 				  &handle, 0);
554 	if (r)
555 		goto error_va_alloc;
556 
557 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
558 				   AMDGPU_VM_PAGE_READABLE |
559 				   AMDGPU_VM_PAGE_WRITEABLE |
560 				   AMDGPU_VM_PAGE_EXECUTABLE |
561 				   mapping_flags,
562 				   AMDGPU_VA_OP_MAP);
563 	if (r)
564 		goto error_va_map;
565 
566 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
567 	if (r)
568 		goto error_cpu_map;
569 
570 	*bo = buf_handle;
571 	*mc_address = vmc_addr;
572 	*va_handle = handle;
573 
574 	return 0;
575 
576  error_cpu_map:
577 	amdgpu_bo_cpu_unmap(buf_handle);
578 
579  error_va_map:
580 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
581 
582  error_va_alloc:
583 	amdgpu_bo_free(buf_handle);
584 	return r;
585 }
586 
587 
588 
suite_basic_tests_init(void)589 int suite_basic_tests_init(void)
590 {
591 	struct amdgpu_gpu_info gpu_info = {0};
592 	int r;
593 
594 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
595 				   &minor_version, &device_handle);
596 
597 	if (r) {
598 		if ((r == -EACCES) && (errno == EACCES))
599 			printf("\n\nError:%s. "
600 				"Hint:Try to run this test program as root.",
601 				strerror(errno));
602 		return CUE_SINIT_FAILED;
603 	}
604 
605 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
606 	if (r)
607 		return CUE_SINIT_FAILED;
608 
609 	family_id = gpu_info.family_id;
610 
611 	return CUE_SUCCESS;
612 }
613 
suite_basic_tests_clean(void)614 int suite_basic_tests_clean(void)
615 {
616 	int r = amdgpu_device_deinitialize(device_handle);
617 
618 	if (r == 0)
619 		return CUE_SUCCESS;
620 	else
621 		return CUE_SCLEAN_FAILED;
622 }
623 
amdgpu_query_info_test(void)624 static void amdgpu_query_info_test(void)
625 {
626 	struct amdgpu_gpu_info gpu_info = {0};
627 	uint32_t version, feature;
628 	int r;
629 
630 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
631 	CU_ASSERT_EQUAL(r, 0);
632 
633 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
634 					  0, &version, &feature);
635 	CU_ASSERT_EQUAL(r, 0);
636 }
637 
amdgpu_command_submission_gfx_separate_ibs(void)638 static void amdgpu_command_submission_gfx_separate_ibs(void)
639 {
640 	amdgpu_context_handle context_handle;
641 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
642 	void *ib_result_cpu, *ib_result_ce_cpu;
643 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
644 	struct amdgpu_cs_request ibs_request = {0};
645 	struct amdgpu_cs_ib_info ib_info[2];
646 	struct amdgpu_cs_fence fence_status = {0};
647 	uint32_t *ptr;
648 	uint32_t expired;
649 	amdgpu_bo_list_handle bo_list;
650 	amdgpu_va_handle va_handle, va_handle_ce;
651 	int r, i = 0;
652 
653 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
654 	CU_ASSERT_EQUAL(r, 0);
655 
656 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
657 				    AMDGPU_GEM_DOMAIN_GTT, 0,
658 				    &ib_result_handle, &ib_result_cpu,
659 				    &ib_result_mc_address, &va_handle);
660 	CU_ASSERT_EQUAL(r, 0);
661 
662 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
663 				    AMDGPU_GEM_DOMAIN_GTT, 0,
664 				    &ib_result_ce_handle, &ib_result_ce_cpu,
665 				    &ib_result_ce_mc_address, &va_handle_ce);
666 	CU_ASSERT_EQUAL(r, 0);
667 
668 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
669 			       ib_result_ce_handle, &bo_list);
670 	CU_ASSERT_EQUAL(r, 0);
671 
672 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
673 
674 	/* IT_SET_CE_DE_COUNTERS */
675 	ptr = ib_result_ce_cpu;
676 	if (family_id != AMDGPU_FAMILY_SI) {
677 		ptr[i++] = 0xc0008900;
678 		ptr[i++] = 0;
679 	}
680 	ptr[i++] = 0xc0008400;
681 	ptr[i++] = 1;
682 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
683 	ib_info[0].size = i;
684 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
685 
686 	/* IT_WAIT_ON_CE_COUNTER */
687 	ptr = ib_result_cpu;
688 	ptr[0] = 0xc0008600;
689 	ptr[1] = 0x00000001;
690 	ib_info[1].ib_mc_address = ib_result_mc_address;
691 	ib_info[1].size = 2;
692 
693 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
694 	ibs_request.number_of_ibs = 2;
695 	ibs_request.ibs = ib_info;
696 	ibs_request.resources = bo_list;
697 	ibs_request.fence_info.handle = NULL;
698 
699 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
700 
701 	CU_ASSERT_EQUAL(r, 0);
702 
703 	fence_status.context = context_handle;
704 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
705 	fence_status.ip_instance = 0;
706 	fence_status.fence = ibs_request.seq_no;
707 
708 	r = amdgpu_cs_query_fence_status(&fence_status,
709 					 AMDGPU_TIMEOUT_INFINITE,
710 					 0, &expired);
711 	CU_ASSERT_EQUAL(r, 0);
712 
713 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
714 				     ib_result_mc_address, 4096);
715 	CU_ASSERT_EQUAL(r, 0);
716 
717 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
718 				     ib_result_ce_mc_address, 4096);
719 	CU_ASSERT_EQUAL(r, 0);
720 
721 	r = amdgpu_bo_list_destroy(bo_list);
722 	CU_ASSERT_EQUAL(r, 0);
723 
724 	r = amdgpu_cs_ctx_free(context_handle);
725 	CU_ASSERT_EQUAL(r, 0);
726 
727 }
728 
amdgpu_command_submission_gfx_shared_ib(void)729 static void amdgpu_command_submission_gfx_shared_ib(void)
730 {
731 	amdgpu_context_handle context_handle;
732 	amdgpu_bo_handle ib_result_handle;
733 	void *ib_result_cpu;
734 	uint64_t ib_result_mc_address;
735 	struct amdgpu_cs_request ibs_request = {0};
736 	struct amdgpu_cs_ib_info ib_info[2];
737 	struct amdgpu_cs_fence fence_status = {0};
738 	uint32_t *ptr;
739 	uint32_t expired;
740 	amdgpu_bo_list_handle bo_list;
741 	amdgpu_va_handle va_handle;
742 	int r, i = 0;
743 
744 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
745 	CU_ASSERT_EQUAL(r, 0);
746 
747 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
748 				    AMDGPU_GEM_DOMAIN_GTT, 0,
749 				    &ib_result_handle, &ib_result_cpu,
750 				    &ib_result_mc_address, &va_handle);
751 	CU_ASSERT_EQUAL(r, 0);
752 
753 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
754 			       &bo_list);
755 	CU_ASSERT_EQUAL(r, 0);
756 
757 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
758 
759 	/* IT_SET_CE_DE_COUNTERS */
760 	ptr = ib_result_cpu;
761 	if (family_id != AMDGPU_FAMILY_SI) {
762 		ptr[i++] = 0xc0008900;
763 		ptr[i++] = 0;
764 	}
765 	ptr[i++] = 0xc0008400;
766 	ptr[i++] = 1;
767 	ib_info[0].ib_mc_address = ib_result_mc_address;
768 	ib_info[0].size = i;
769 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
770 
771 	ptr = (uint32_t *)ib_result_cpu + 4;
772 	ptr[0] = 0xc0008600;
773 	ptr[1] = 0x00000001;
774 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
775 	ib_info[1].size = 2;
776 
777 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
778 	ibs_request.number_of_ibs = 2;
779 	ibs_request.ibs = ib_info;
780 	ibs_request.resources = bo_list;
781 	ibs_request.fence_info.handle = NULL;
782 
783 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
784 
785 	CU_ASSERT_EQUAL(r, 0);
786 
787 	fence_status.context = context_handle;
788 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
789 	fence_status.ip_instance = 0;
790 	fence_status.fence = ibs_request.seq_no;
791 
792 	r = amdgpu_cs_query_fence_status(&fence_status,
793 					 AMDGPU_TIMEOUT_INFINITE,
794 					 0, &expired);
795 	CU_ASSERT_EQUAL(r, 0);
796 
797 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
798 				     ib_result_mc_address, 4096);
799 	CU_ASSERT_EQUAL(r, 0);
800 
801 	r = amdgpu_bo_list_destroy(bo_list);
802 	CU_ASSERT_EQUAL(r, 0);
803 
804 	r = amdgpu_cs_ctx_free(context_handle);
805 	CU_ASSERT_EQUAL(r, 0);
806 }
807 
amdgpu_command_submission_gfx_cp_write_data(void)808 static void amdgpu_command_submission_gfx_cp_write_data(void)
809 {
810 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
811 }
812 
amdgpu_command_submission_gfx_cp_const_fill(void)813 static void amdgpu_command_submission_gfx_cp_const_fill(void)
814 {
815 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
816 }
817 
amdgpu_command_submission_gfx_cp_copy_data(void)818 static void amdgpu_command_submission_gfx_cp_copy_data(void)
819 {
820 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
821 }
822 
amdgpu_bo_eviction_test(void)823 static void amdgpu_bo_eviction_test(void)
824 {
825 	const int sdma_write_length = 1024;
826 	const int pm4_dw = 256;
827 	amdgpu_context_handle context_handle;
828 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
829 	amdgpu_bo_handle *resources;
830 	uint32_t *pm4;
831 	struct amdgpu_cs_ib_info *ib_info;
832 	struct amdgpu_cs_request *ibs_request;
833 	uint64_t bo1_mc, bo2_mc;
834 	volatile unsigned char *bo1_cpu, *bo2_cpu;
835 	int i, j, r, loop1, loop2;
836 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
837 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
838 	struct amdgpu_heap_info vram_info, gtt_info;
839 
840 	pm4 = calloc(pm4_dw, sizeof(*pm4));
841 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
842 
843 	ib_info = calloc(1, sizeof(*ib_info));
844 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
845 
846 	ibs_request = calloc(1, sizeof(*ibs_request));
847 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
848 
849 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
850 	CU_ASSERT_EQUAL(r, 0);
851 
852 	/* prepare resource */
853 	resources = calloc(4, sizeof(amdgpu_bo_handle));
854 	CU_ASSERT_NOT_EQUAL(resources, NULL);
855 
856 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
857 				   0, &vram_info);
858 	CU_ASSERT_EQUAL(r, 0);
859 
860 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
861 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
862 	CU_ASSERT_EQUAL(r, 0);
863 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
864 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
865 	CU_ASSERT_EQUAL(r, 0);
866 
867 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
868 				   0, &gtt_info);
869 	CU_ASSERT_EQUAL(r, 0);
870 
871 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
872 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
873 	CU_ASSERT_EQUAL(r, 0);
874 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
875 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
876 	CU_ASSERT_EQUAL(r, 0);
877 
878 
879 
880 	loop1 = loop2 = 0;
881 	/* run 9 circle to test all mapping combination */
882 	while(loop1 < 2) {
883 		while(loop2 < 2) {
884 			/* allocate UC bo1for sDMA use */
885 			r = amdgpu_bo_alloc_and_map(device_handle,
886 						    sdma_write_length, 4096,
887 						    AMDGPU_GEM_DOMAIN_GTT,
888 						    gtt_flags[loop1], &bo1,
889 						    (void**)&bo1_cpu, &bo1_mc,
890 						    &bo1_va_handle);
891 			CU_ASSERT_EQUAL(r, 0);
892 
893 			/* set bo1 */
894 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
895 
896 			/* allocate UC bo2 for sDMA use */
897 			r = amdgpu_bo_alloc_and_map(device_handle,
898 						    sdma_write_length, 4096,
899 						    AMDGPU_GEM_DOMAIN_GTT,
900 						    gtt_flags[loop2], &bo2,
901 						    (void**)&bo2_cpu, &bo2_mc,
902 						    &bo2_va_handle);
903 			CU_ASSERT_EQUAL(r, 0);
904 
905 			/* clear bo2 */
906 			memset((void*)bo2_cpu, 0, sdma_write_length);
907 
908 			resources[0] = bo1;
909 			resources[1] = bo2;
910 			resources[2] = vram_max[loop2];
911 			resources[3] = gtt_max[loop2];
912 
913 			/* fulfill PM4: test DMA copy linear */
914 			i = j = 0;
915 			if (family_id == AMDGPU_FAMILY_SI) {
916 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
917 							  sdma_write_length);
918 				pm4[i++] = 0xffffffff & bo2_mc;
919 				pm4[i++] = 0xffffffff & bo1_mc;
920 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
921 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
922 			} else {
923 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
924 				if (family_id >= AMDGPU_FAMILY_AI)
925 					pm4[i++] = sdma_write_length - 1;
926 				else
927 					pm4[i++] = sdma_write_length;
928 				pm4[i++] = 0;
929 				pm4[i++] = 0xffffffff & bo1_mc;
930 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
931 				pm4[i++] = 0xffffffff & bo2_mc;
932 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
933 			}
934 
935 			amdgpu_test_exec_cs_helper(context_handle,
936 						   AMDGPU_HW_IP_DMA, 0,
937 						   i, pm4,
938 						   4, resources,
939 						   ib_info, ibs_request);
940 
941 			/* verify if SDMA test result meets with expected */
942 			i = 0;
943 			while(i < sdma_write_length) {
944 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
945 			}
946 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
947 						     sdma_write_length);
948 			CU_ASSERT_EQUAL(r, 0);
949 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
950 						     sdma_write_length);
951 			CU_ASSERT_EQUAL(r, 0);
952 			loop2++;
953 		}
954 		loop2 = 0;
955 		loop1++;
956 	}
957 	amdgpu_bo_free(vram_max[0]);
958 	amdgpu_bo_free(vram_max[1]);
959 	amdgpu_bo_free(gtt_max[0]);
960 	amdgpu_bo_free(gtt_max[1]);
961 	/* clean resources */
962 	free(resources);
963 	free(ibs_request);
964 	free(ib_info);
965 	free(pm4);
966 
967 	/* end of test */
968 	r = amdgpu_cs_ctx_free(context_handle);
969 	CU_ASSERT_EQUAL(r, 0);
970 }
971 
972 
amdgpu_command_submission_gfx(void)973 static void amdgpu_command_submission_gfx(void)
974 {
975 	/* write data using the CP */
976 	amdgpu_command_submission_gfx_cp_write_data();
977 	/* const fill using the CP */
978 	amdgpu_command_submission_gfx_cp_const_fill();
979 	/* copy data using the CP */
980 	amdgpu_command_submission_gfx_cp_copy_data();
981 	/* separate IB buffers for multi-IB submission */
982 	amdgpu_command_submission_gfx_separate_ibs();
983 	/* shared IB buffer for multi-IB submission */
984 	amdgpu_command_submission_gfx_shared_ib();
985 }
986 
amdgpu_semaphore_test(void)987 static void amdgpu_semaphore_test(void)
988 {
989 	amdgpu_context_handle context_handle[2];
990 	amdgpu_semaphore_handle sem;
991 	amdgpu_bo_handle ib_result_handle[2];
992 	void *ib_result_cpu[2];
993 	uint64_t ib_result_mc_address[2];
994 	struct amdgpu_cs_request ibs_request[2] = {0};
995 	struct amdgpu_cs_ib_info ib_info[2] = {0};
996 	struct amdgpu_cs_fence fence_status = {0};
997 	uint32_t *ptr;
998 	uint32_t expired;
999 	uint32_t sdma_nop, gfx_nop;
1000 	amdgpu_bo_list_handle bo_list[2];
1001 	amdgpu_va_handle va_handle[2];
1002 	int r, i;
1003 
1004 	if (family_id == AMDGPU_FAMILY_SI) {
1005 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1006 		gfx_nop = GFX_COMPUTE_NOP_SI;
1007 	} else {
1008 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1009 		gfx_nop = GFX_COMPUTE_NOP;
1010 	}
1011 
1012 	r = amdgpu_cs_create_semaphore(&sem);
1013 	CU_ASSERT_EQUAL(r, 0);
1014 	for (i = 0; i < 2; i++) {
1015 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1016 		CU_ASSERT_EQUAL(r, 0);
1017 
1018 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1019 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1020 					    &ib_result_handle[i], &ib_result_cpu[i],
1021 					    &ib_result_mc_address[i], &va_handle[i]);
1022 		CU_ASSERT_EQUAL(r, 0);
1023 
1024 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1025 				       NULL, &bo_list[i]);
1026 		CU_ASSERT_EQUAL(r, 0);
1027 	}
1028 
1029 	/* 1. same context different engine */
1030 	ptr = ib_result_cpu[0];
1031 	ptr[0] = sdma_nop;
1032 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1033 	ib_info[0].size = 1;
1034 
1035 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1036 	ibs_request[0].number_of_ibs = 1;
1037 	ibs_request[0].ibs = &ib_info[0];
1038 	ibs_request[0].resources = bo_list[0];
1039 	ibs_request[0].fence_info.handle = NULL;
1040 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1041 	CU_ASSERT_EQUAL(r, 0);
1042 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1043 	CU_ASSERT_EQUAL(r, 0);
1044 
1045 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1046 	CU_ASSERT_EQUAL(r, 0);
1047 	ptr = ib_result_cpu[1];
1048 	ptr[0] = gfx_nop;
1049 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1050 	ib_info[1].size = 1;
1051 
1052 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1053 	ibs_request[1].number_of_ibs = 1;
1054 	ibs_request[1].ibs = &ib_info[1];
1055 	ibs_request[1].resources = bo_list[1];
1056 	ibs_request[1].fence_info.handle = NULL;
1057 
1058 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1059 	CU_ASSERT_EQUAL(r, 0);
1060 
1061 	fence_status.context = context_handle[0];
1062 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1063 	fence_status.ip_instance = 0;
1064 	fence_status.fence = ibs_request[1].seq_no;
1065 	r = amdgpu_cs_query_fence_status(&fence_status,
1066 					 500000000, 0, &expired);
1067 	CU_ASSERT_EQUAL(r, 0);
1068 	CU_ASSERT_EQUAL(expired, true);
1069 
1070 	/* 2. same engine different context */
1071 	ptr = ib_result_cpu[0];
1072 	ptr[0] = gfx_nop;
1073 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1074 	ib_info[0].size = 1;
1075 
1076 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
1077 	ibs_request[0].number_of_ibs = 1;
1078 	ibs_request[0].ibs = &ib_info[0];
1079 	ibs_request[0].resources = bo_list[0];
1080 	ibs_request[0].fence_info.handle = NULL;
1081 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1082 	CU_ASSERT_EQUAL(r, 0);
1083 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
1084 	CU_ASSERT_EQUAL(r, 0);
1085 
1086 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
1087 	CU_ASSERT_EQUAL(r, 0);
1088 	ptr = ib_result_cpu[1];
1089 	ptr[0] = gfx_nop;
1090 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1091 	ib_info[1].size = 1;
1092 
1093 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
1094 	ibs_request[1].number_of_ibs = 1;
1095 	ibs_request[1].ibs = &ib_info[1];
1096 	ibs_request[1].resources = bo_list[1];
1097 	ibs_request[1].fence_info.handle = NULL;
1098 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1099 
1100 	CU_ASSERT_EQUAL(r, 0);
1101 
1102 	fence_status.context = context_handle[1];
1103 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1104 	fence_status.ip_instance = 0;
1105 	fence_status.fence = ibs_request[1].seq_no;
1106 	r = amdgpu_cs_query_fence_status(&fence_status,
1107 					 500000000, 0, &expired);
1108 	CU_ASSERT_EQUAL(r, 0);
1109 	CU_ASSERT_EQUAL(expired, true);
1110 
1111 	for (i = 0; i < 2; i++) {
1112 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1113 					     ib_result_mc_address[i], 4096);
1114 		CU_ASSERT_EQUAL(r, 0);
1115 
1116 		r = amdgpu_bo_list_destroy(bo_list[i]);
1117 		CU_ASSERT_EQUAL(r, 0);
1118 
1119 		r = amdgpu_cs_ctx_free(context_handle[i]);
1120 		CU_ASSERT_EQUAL(r, 0);
1121 	}
1122 
1123 	r = amdgpu_cs_destroy_semaphore(sem);
1124 	CU_ASSERT_EQUAL(r, 0);
1125 }
1126 
amdgpu_command_submission_compute_nop(void)1127 static void amdgpu_command_submission_compute_nop(void)
1128 {
1129 	amdgpu_context_handle context_handle;
1130 	amdgpu_bo_handle ib_result_handle;
1131 	void *ib_result_cpu;
1132 	uint64_t ib_result_mc_address;
1133 	struct amdgpu_cs_request ibs_request;
1134 	struct amdgpu_cs_ib_info ib_info;
1135 	struct amdgpu_cs_fence fence_status;
1136 	uint32_t *ptr;
1137 	uint32_t expired;
1138 	int r, instance;
1139 	amdgpu_bo_list_handle bo_list;
1140 	amdgpu_va_handle va_handle;
1141 	struct drm_amdgpu_info_hw_ip info;
1142 
1143 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1144 	CU_ASSERT_EQUAL(r, 0);
1145 
1146 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1147 	CU_ASSERT_EQUAL(r, 0);
1148 
1149 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1150 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1151 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1152 					    &ib_result_handle, &ib_result_cpu,
1153 					    &ib_result_mc_address, &va_handle);
1154 		CU_ASSERT_EQUAL(r, 0);
1155 
1156 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1157 				       &bo_list);
1158 		CU_ASSERT_EQUAL(r, 0);
1159 
1160 		ptr = ib_result_cpu;
1161 		memset(ptr, 0, 16);
1162 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1163 
1164 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1165 		ib_info.ib_mc_address = ib_result_mc_address;
1166 		ib_info.size = 16;
1167 
1168 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1169 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1170 		ibs_request.ring = instance;
1171 		ibs_request.number_of_ibs = 1;
1172 		ibs_request.ibs = &ib_info;
1173 		ibs_request.resources = bo_list;
1174 		ibs_request.fence_info.handle = NULL;
1175 
1176 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1177 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1178 		CU_ASSERT_EQUAL(r, 0);
1179 
1180 		fence_status.context = context_handle;
1181 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1182 		fence_status.ip_instance = 0;
1183 		fence_status.ring = instance;
1184 		fence_status.fence = ibs_request.seq_no;
1185 
1186 		r = amdgpu_cs_query_fence_status(&fence_status,
1187 						 AMDGPU_TIMEOUT_INFINITE,
1188 						 0, &expired);
1189 		CU_ASSERT_EQUAL(r, 0);
1190 
1191 		r = amdgpu_bo_list_destroy(bo_list);
1192 		CU_ASSERT_EQUAL(r, 0);
1193 
1194 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1195 					     ib_result_mc_address, 4096);
1196 		CU_ASSERT_EQUAL(r, 0);
1197 	}
1198 
1199 	r = amdgpu_cs_ctx_free(context_handle);
1200 	CU_ASSERT_EQUAL(r, 0);
1201 }
1202 
amdgpu_command_submission_compute_cp_write_data(void)1203 static void amdgpu_command_submission_compute_cp_write_data(void)
1204 {
1205 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1206 }
1207 
amdgpu_command_submission_compute_cp_const_fill(void)1208 static void amdgpu_command_submission_compute_cp_const_fill(void)
1209 {
1210 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1211 }
1212 
amdgpu_command_submission_compute_cp_copy_data(void)1213 static void amdgpu_command_submission_compute_cp_copy_data(void)
1214 {
1215 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1216 }
1217 
amdgpu_command_submission_compute(void)1218 static void amdgpu_command_submission_compute(void)
1219 {
1220 	/* write data using the CP */
1221 	amdgpu_command_submission_compute_cp_write_data();
1222 	/* const fill using the CP */
1223 	amdgpu_command_submission_compute_cp_const_fill();
1224 	/* copy data using the CP */
1225 	amdgpu_command_submission_compute_cp_copy_data();
1226 	/* nop test */
1227 	amdgpu_command_submission_compute_nop();
1228 }
1229 
1230 /*
1231  * caller need create/release:
1232  * pm4_src, resources, ib_info, and ibs_request
1233  * submit command stream described in ibs_request and wait for this IB accomplished
1234  */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1235 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1236 				       unsigned ip_type,
1237 				       int instance, int pm4_dw, uint32_t *pm4_src,
1238 				       int res_cnt, amdgpu_bo_handle *resources,
1239 				       struct amdgpu_cs_ib_info *ib_info,
1240 				       struct amdgpu_cs_request *ibs_request)
1241 {
1242 	int r;
1243 	uint32_t expired;
1244 	uint32_t *ring_ptr;
1245 	amdgpu_bo_handle ib_result_handle;
1246 	void *ib_result_cpu;
1247 	uint64_t ib_result_mc_address;
1248 	struct amdgpu_cs_fence fence_status = {0};
1249 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1250 	amdgpu_va_handle va_handle;
1251 
1252 	/* prepare CS */
1253 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1254 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1255 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1256 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1257 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1258 
1259 	/* allocate IB */
1260 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1261 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1262 				    &ib_result_handle, &ib_result_cpu,
1263 				    &ib_result_mc_address, &va_handle);
1264 	CU_ASSERT_EQUAL(r, 0);
1265 
1266 	/* copy PM4 packet to ring from caller */
1267 	ring_ptr = ib_result_cpu;
1268 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1269 
1270 	ib_info->ib_mc_address = ib_result_mc_address;
1271 	ib_info->size = pm4_dw;
1272 
1273 	ibs_request->ip_type = ip_type;
1274 	ibs_request->ring = instance;
1275 	ibs_request->number_of_ibs = 1;
1276 	ibs_request->ibs = ib_info;
1277 	ibs_request->fence_info.handle = NULL;
1278 
1279 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1280 	all_res[res_cnt] = ib_result_handle;
1281 
1282 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1283 				  NULL, &ibs_request->resources);
1284 	CU_ASSERT_EQUAL(r, 0);
1285 
1286 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1287 
1288 	/* submit CS */
1289 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1290 	CU_ASSERT_EQUAL(r, 0);
1291 
1292 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1293 	CU_ASSERT_EQUAL(r, 0);
1294 
1295 	fence_status.ip_type = ip_type;
1296 	fence_status.ip_instance = 0;
1297 	fence_status.ring = ibs_request->ring;
1298 	fence_status.context = context_handle;
1299 	fence_status.fence = ibs_request->seq_no;
1300 
1301 	/* wait for IB accomplished */
1302 	r = amdgpu_cs_query_fence_status(&fence_status,
1303 					 AMDGPU_TIMEOUT_INFINITE,
1304 					 0, &expired);
1305 	CU_ASSERT_EQUAL(r, 0);
1306 	CU_ASSERT_EQUAL(expired, true);
1307 
1308 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1309 				     ib_result_mc_address, 4096);
1310 	CU_ASSERT_EQUAL(r, 0);
1311 }
1312 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1313 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1314 {
1315 	const int sdma_write_length = 128;
1316 	const int pm4_dw = 256;
1317 	amdgpu_context_handle context_handle;
1318 	amdgpu_bo_handle bo;
1319 	amdgpu_bo_handle *resources;
1320 	uint32_t *pm4;
1321 	struct amdgpu_cs_ib_info *ib_info;
1322 	struct amdgpu_cs_request *ibs_request;
1323 	uint64_t bo_mc;
1324 	volatile uint32_t *bo_cpu;
1325 	int i, j, r, loop, ring_id;
1326 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1327 	amdgpu_va_handle va_handle;
1328 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1329 
1330 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1331 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1332 
1333 	ib_info = calloc(1, sizeof(*ib_info));
1334 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1335 
1336 	ibs_request = calloc(1, sizeof(*ibs_request));
1337 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1338 
1339 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1340 	CU_ASSERT_EQUAL(r, 0);
1341 
1342 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1343 	CU_ASSERT_EQUAL(r, 0);
1344 
1345 	/* prepare resource */
1346 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1347 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1348 
1349 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1350 		loop = 0;
1351 		while(loop < 2) {
1352 			/* allocate UC bo for sDMA use */
1353 			r = amdgpu_bo_alloc_and_map(device_handle,
1354 						    sdma_write_length * sizeof(uint32_t),
1355 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1356 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1357 						    &bo_mc, &va_handle);
1358 			CU_ASSERT_EQUAL(r, 0);
1359 
1360 			/* clear bo */
1361 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1362 
1363 			resources[0] = bo;
1364 
1365 			/* fulfill PM4: test DMA write-linear */
1366 			i = j = 0;
1367 			if (ip_type == AMDGPU_HW_IP_DMA) {
1368 				if (family_id == AMDGPU_FAMILY_SI)
1369 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1370 								  sdma_write_length);
1371 				else
1372 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1373 							       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1374 				pm4[i++] = 0xffffffff & bo_mc;
1375 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1376 				if (family_id >= AMDGPU_FAMILY_AI)
1377 					pm4[i++] = sdma_write_length - 1;
1378 				else if (family_id != AMDGPU_FAMILY_SI)
1379 					pm4[i++] = sdma_write_length;
1380 				while(j++ < sdma_write_length)
1381 					pm4[i++] = 0xdeadbeaf;
1382 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1383 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1384 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1385 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1386 				pm4[i++] = 0xfffffffc & bo_mc;
1387 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1388 				while(j++ < sdma_write_length)
1389 					pm4[i++] = 0xdeadbeaf;
1390 			}
1391 
1392 			amdgpu_test_exec_cs_helper(context_handle,
1393 						   ip_type, ring_id,
1394 						   i, pm4,
1395 						   1, resources,
1396 						   ib_info, ibs_request);
1397 
1398 			/* verify if SDMA test result meets with expected */
1399 			i = 0;
1400 			while(i < sdma_write_length) {
1401 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1402 			}
1403 
1404 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1405 						     sdma_write_length * sizeof(uint32_t));
1406 			CU_ASSERT_EQUAL(r, 0);
1407 			loop++;
1408 		}
1409 	}
1410 	/* clean resources */
1411 	free(resources);
1412 	free(ibs_request);
1413 	free(ib_info);
1414 	free(pm4);
1415 
1416 	/* end of test */
1417 	r = amdgpu_cs_ctx_free(context_handle);
1418 	CU_ASSERT_EQUAL(r, 0);
1419 }
1420 
amdgpu_command_submission_sdma_write_linear(void)1421 static void amdgpu_command_submission_sdma_write_linear(void)
1422 {
1423 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1424 }
1425 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1426 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1427 {
1428 	const int sdma_write_length = 1024 * 1024;
1429 	const int pm4_dw = 256;
1430 	amdgpu_context_handle context_handle;
1431 	amdgpu_bo_handle bo;
1432 	amdgpu_bo_handle *resources;
1433 	uint32_t *pm4;
1434 	struct amdgpu_cs_ib_info *ib_info;
1435 	struct amdgpu_cs_request *ibs_request;
1436 	uint64_t bo_mc;
1437 	volatile uint32_t *bo_cpu;
1438 	int i, j, r, loop, ring_id;
1439 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1440 	amdgpu_va_handle va_handle;
1441 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1442 
1443 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1444 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1445 
1446 	ib_info = calloc(1, sizeof(*ib_info));
1447 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1448 
1449 	ibs_request = calloc(1, sizeof(*ibs_request));
1450 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1451 
1452 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1453 	CU_ASSERT_EQUAL(r, 0);
1454 
1455 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1456 	CU_ASSERT_EQUAL(r, 0);
1457 
1458 	/* prepare resource */
1459 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1460 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1461 
1462 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1463 		loop = 0;
1464 		while(loop < 2) {
1465 			/* allocate UC bo for sDMA use */
1466 			r = amdgpu_bo_alloc_and_map(device_handle,
1467 						    sdma_write_length, 4096,
1468 						    AMDGPU_GEM_DOMAIN_GTT,
1469 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1470 						    &bo_mc, &va_handle);
1471 			CU_ASSERT_EQUAL(r, 0);
1472 
1473 			/* clear bo */
1474 			memset((void*)bo_cpu, 0, sdma_write_length);
1475 
1476 			resources[0] = bo;
1477 
1478 			/* fulfill PM4: test DMA const fill */
1479 			i = j = 0;
1480 			if (ip_type == AMDGPU_HW_IP_DMA) {
1481 				if (family_id == AMDGPU_FAMILY_SI) {
1482 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1483 								  0, 0, 0,
1484 								  sdma_write_length / 4);
1485 					pm4[i++] = 0xfffffffc & bo_mc;
1486 					pm4[i++] = 0xdeadbeaf;
1487 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1488 				} else {
1489 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1490 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1491 					pm4[i++] = 0xffffffff & bo_mc;
1492 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1493 					pm4[i++] = 0xdeadbeaf;
1494 					if (family_id >= AMDGPU_FAMILY_AI)
1495 						pm4[i++] = sdma_write_length - 1;
1496 					else
1497 						pm4[i++] = sdma_write_length;
1498 				}
1499 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1500 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1501 				if (family_id == AMDGPU_FAMILY_SI) {
1502 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1503 					pm4[i++] = 0xdeadbeaf;
1504 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1505 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1506 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1507 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1508 					pm4[i++] = 0xffffffff & bo_mc;
1509 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1510 					pm4[i++] = sdma_write_length;
1511 				} else {
1512 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1513 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1514 						   PACKET3_DMA_DATA_DST_SEL(0) |
1515 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1516 						   PACKET3_DMA_DATA_CP_SYNC;
1517 					pm4[i++] = 0xdeadbeaf;
1518 					pm4[i++] = 0;
1519 					pm4[i++] = 0xfffffffc & bo_mc;
1520 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1521 					pm4[i++] = sdma_write_length;
1522 				}
1523 			}
1524 
1525 			amdgpu_test_exec_cs_helper(context_handle,
1526 						   ip_type, ring_id,
1527 						   i, pm4,
1528 						   1, resources,
1529 						   ib_info, ibs_request);
1530 
1531 			/* verify if SDMA test result meets with expected */
1532 			i = 0;
1533 			while(i < (sdma_write_length / 4)) {
1534 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1535 			}
1536 
1537 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1538 						     sdma_write_length);
1539 			CU_ASSERT_EQUAL(r, 0);
1540 			loop++;
1541 		}
1542 	}
1543 	/* clean resources */
1544 	free(resources);
1545 	free(ibs_request);
1546 	free(ib_info);
1547 	free(pm4);
1548 
1549 	/* end of test */
1550 	r = amdgpu_cs_ctx_free(context_handle);
1551 	CU_ASSERT_EQUAL(r, 0);
1552 }
1553 
amdgpu_command_submission_sdma_const_fill(void)1554 static void amdgpu_command_submission_sdma_const_fill(void)
1555 {
1556 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1557 }
1558 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1559 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1560 {
1561 	const int sdma_write_length = 1024;
1562 	const int pm4_dw = 256;
1563 	amdgpu_context_handle context_handle;
1564 	amdgpu_bo_handle bo1, bo2;
1565 	amdgpu_bo_handle *resources;
1566 	uint32_t *pm4;
1567 	struct amdgpu_cs_ib_info *ib_info;
1568 	struct amdgpu_cs_request *ibs_request;
1569 	uint64_t bo1_mc, bo2_mc;
1570 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1571 	int i, j, r, loop1, loop2, ring_id;
1572 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1573 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1574 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1575 
1576 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1577 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1578 
1579 	ib_info = calloc(1, sizeof(*ib_info));
1580 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1581 
1582 	ibs_request = calloc(1, sizeof(*ibs_request));
1583 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1584 
1585 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1586 	CU_ASSERT_EQUAL(r, 0);
1587 
1588 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1589 	CU_ASSERT_EQUAL(r, 0);
1590 
1591 	/* prepare resource */
1592 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1593 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1594 
1595 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1596 		loop1 = loop2 = 0;
1597 		/* run 9 circle to test all mapping combination */
1598 		while(loop1 < 2) {
1599 			while(loop2 < 2) {
1600 				/* allocate UC bo1for sDMA use */
1601 				r = amdgpu_bo_alloc_and_map(device_handle,
1602 							    sdma_write_length, 4096,
1603 							    AMDGPU_GEM_DOMAIN_GTT,
1604 							    gtt_flags[loop1], &bo1,
1605 							    (void**)&bo1_cpu, &bo1_mc,
1606 							    &bo1_va_handle);
1607 				CU_ASSERT_EQUAL(r, 0);
1608 
1609 				/* set bo1 */
1610 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1611 
1612 				/* allocate UC bo2 for sDMA use */
1613 				r = amdgpu_bo_alloc_and_map(device_handle,
1614 							    sdma_write_length, 4096,
1615 							    AMDGPU_GEM_DOMAIN_GTT,
1616 							    gtt_flags[loop2], &bo2,
1617 							    (void**)&bo2_cpu, &bo2_mc,
1618 							    &bo2_va_handle);
1619 				CU_ASSERT_EQUAL(r, 0);
1620 
1621 				/* clear bo2 */
1622 				memset((void*)bo2_cpu, 0, sdma_write_length);
1623 
1624 				resources[0] = bo1;
1625 				resources[1] = bo2;
1626 
1627 				/* fulfill PM4: test DMA copy linear */
1628 				i = j = 0;
1629 				if (ip_type == AMDGPU_HW_IP_DMA) {
1630 					if (family_id == AMDGPU_FAMILY_SI) {
1631 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1632 									  0, 0, 0,
1633 									  sdma_write_length);
1634 						pm4[i++] = 0xffffffff & bo2_mc;
1635 						pm4[i++] = 0xffffffff & bo1_mc;
1636 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1637 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1638 					} else {
1639 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1640 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1641 								       0);
1642 						if (family_id >= AMDGPU_FAMILY_AI)
1643 							pm4[i++] = sdma_write_length - 1;
1644 						else
1645 							pm4[i++] = sdma_write_length;
1646 						pm4[i++] = 0;
1647 						pm4[i++] = 0xffffffff & bo1_mc;
1648 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1649 						pm4[i++] = 0xffffffff & bo2_mc;
1650 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1651 					}
1652 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1653 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1654 					if (family_id == AMDGPU_FAMILY_SI) {
1655 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1656 						pm4[i++] = 0xfffffffc & bo1_mc;
1657 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1658 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1659 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1660 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1661 							   (0xffff00000000 & bo1_mc) >> 32;
1662 						pm4[i++] = 0xfffffffc & bo2_mc;
1663 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1664 						pm4[i++] = sdma_write_length;
1665 					} else {
1666 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1667 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1668 							   PACKET3_DMA_DATA_DST_SEL(0) |
1669 							   PACKET3_DMA_DATA_SRC_SEL(0) |
1670 							   PACKET3_DMA_DATA_CP_SYNC;
1671 						pm4[i++] = 0xfffffffc & bo1_mc;
1672 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1673 						pm4[i++] = 0xfffffffc & bo2_mc;
1674 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1675 						pm4[i++] = sdma_write_length;
1676 					}
1677 				}
1678 
1679 				amdgpu_test_exec_cs_helper(context_handle,
1680 							   ip_type, ring_id,
1681 							   i, pm4,
1682 							   2, resources,
1683 							   ib_info, ibs_request);
1684 
1685 				/* verify if SDMA test result meets with expected */
1686 				i = 0;
1687 				while(i < sdma_write_length) {
1688 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1689 				}
1690 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1691 							     sdma_write_length);
1692 				CU_ASSERT_EQUAL(r, 0);
1693 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1694 							     sdma_write_length);
1695 				CU_ASSERT_EQUAL(r, 0);
1696 				loop2++;
1697 			}
1698 			loop1++;
1699 		}
1700 	}
1701 	/* clean resources */
1702 	free(resources);
1703 	free(ibs_request);
1704 	free(ib_info);
1705 	free(pm4);
1706 
1707 	/* end of test */
1708 	r = amdgpu_cs_ctx_free(context_handle);
1709 	CU_ASSERT_EQUAL(r, 0);
1710 }
1711 
amdgpu_command_submission_sdma_copy_linear(void)1712 static void amdgpu_command_submission_sdma_copy_linear(void)
1713 {
1714 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1715 }
1716 
amdgpu_command_submission_sdma(void)1717 static void amdgpu_command_submission_sdma(void)
1718 {
1719 	amdgpu_command_submission_sdma_write_linear();
1720 	amdgpu_command_submission_sdma_const_fill();
1721 	amdgpu_command_submission_sdma_copy_linear();
1722 }
1723 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1724 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1725 {
1726 	amdgpu_context_handle context_handle;
1727 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1728 	void *ib_result_cpu, *ib_result_ce_cpu;
1729 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1730 	struct amdgpu_cs_request ibs_request[2] = {0};
1731 	struct amdgpu_cs_ib_info ib_info[2];
1732 	struct amdgpu_cs_fence fence_status[2] = {0};
1733 	uint32_t *ptr;
1734 	uint32_t expired;
1735 	amdgpu_bo_list_handle bo_list;
1736 	amdgpu_va_handle va_handle, va_handle_ce;
1737 	int r;
1738 	int i = 0, ib_cs_num = 2;
1739 
1740 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1741 	CU_ASSERT_EQUAL(r, 0);
1742 
1743 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1744 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1745 				    &ib_result_handle, &ib_result_cpu,
1746 				    &ib_result_mc_address, &va_handle);
1747 	CU_ASSERT_EQUAL(r, 0);
1748 
1749 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1750 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1751 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1752 				    &ib_result_ce_mc_address, &va_handle_ce);
1753 	CU_ASSERT_EQUAL(r, 0);
1754 
1755 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1756 			       ib_result_ce_handle, &bo_list);
1757 	CU_ASSERT_EQUAL(r, 0);
1758 
1759 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1760 
1761 	/* IT_SET_CE_DE_COUNTERS */
1762 	ptr = ib_result_ce_cpu;
1763 	if (family_id != AMDGPU_FAMILY_SI) {
1764 		ptr[i++] = 0xc0008900;
1765 		ptr[i++] = 0;
1766 	}
1767 	ptr[i++] = 0xc0008400;
1768 	ptr[i++] = 1;
1769 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1770 	ib_info[0].size = i;
1771 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1772 
1773 	/* IT_WAIT_ON_CE_COUNTER */
1774 	ptr = ib_result_cpu;
1775 	ptr[0] = 0xc0008600;
1776 	ptr[1] = 0x00000001;
1777 	ib_info[1].ib_mc_address = ib_result_mc_address;
1778 	ib_info[1].size = 2;
1779 
1780 	for (i = 0; i < ib_cs_num; i++) {
1781 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1782 		ibs_request[i].number_of_ibs = 2;
1783 		ibs_request[i].ibs = ib_info;
1784 		ibs_request[i].resources = bo_list;
1785 		ibs_request[i].fence_info.handle = NULL;
1786 	}
1787 
1788 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1789 
1790 	CU_ASSERT_EQUAL(r, 0);
1791 
1792 	for (i = 0; i < ib_cs_num; i++) {
1793 		fence_status[i].context = context_handle;
1794 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1795 		fence_status[i].fence = ibs_request[i].seq_no;
1796 	}
1797 
1798 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1799 				AMDGPU_TIMEOUT_INFINITE,
1800 				&expired, NULL);
1801 	CU_ASSERT_EQUAL(r, 0);
1802 
1803 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1804 				     ib_result_mc_address, 4096);
1805 	CU_ASSERT_EQUAL(r, 0);
1806 
1807 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1808 				     ib_result_ce_mc_address, 4096);
1809 	CU_ASSERT_EQUAL(r, 0);
1810 
1811 	r = amdgpu_bo_list_destroy(bo_list);
1812 	CU_ASSERT_EQUAL(r, 0);
1813 
1814 	r = amdgpu_cs_ctx_free(context_handle);
1815 	CU_ASSERT_EQUAL(r, 0);
1816 }
1817 
amdgpu_command_submission_multi_fence(void)1818 static void amdgpu_command_submission_multi_fence(void)
1819 {
1820 	amdgpu_command_submission_multi_fence_wait_all(true);
1821 	amdgpu_command_submission_multi_fence_wait_all(false);
1822 }
1823 
amdgpu_userptr_test(void)1824 static void amdgpu_userptr_test(void)
1825 {
1826 	int i, r, j;
1827 	uint32_t *pm4 = NULL;
1828 	uint64_t bo_mc;
1829 	void *ptr = NULL;
1830 	int pm4_dw = 256;
1831 	int sdma_write_length = 4;
1832 	amdgpu_bo_handle handle;
1833 	amdgpu_context_handle context_handle;
1834 	struct amdgpu_cs_ib_info *ib_info;
1835 	struct amdgpu_cs_request *ibs_request;
1836 	amdgpu_bo_handle buf_handle;
1837 	amdgpu_va_handle va_handle;
1838 
1839 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1840 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1841 
1842 	ib_info = calloc(1, sizeof(*ib_info));
1843 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1844 
1845 	ibs_request = calloc(1, sizeof(*ibs_request));
1846 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1847 
1848 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1849 	CU_ASSERT_EQUAL(r, 0);
1850 
1851 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1852 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
1853 	memset(ptr, 0, BUFFER_SIZE);
1854 
1855 	r = amdgpu_create_bo_from_user_mem(device_handle,
1856 					   ptr, BUFFER_SIZE, &buf_handle);
1857 	CU_ASSERT_EQUAL(r, 0);
1858 
1859 	r = amdgpu_va_range_alloc(device_handle,
1860 				  amdgpu_gpu_va_range_general,
1861 				  BUFFER_SIZE, 1, 0, &bo_mc,
1862 				  &va_handle, 0);
1863 	CU_ASSERT_EQUAL(r, 0);
1864 
1865 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1866 	CU_ASSERT_EQUAL(r, 0);
1867 
1868 	handle = buf_handle;
1869 
1870 	j = i = 0;
1871 
1872 	if (family_id == AMDGPU_FAMILY_SI)
1873 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1874 				sdma_write_length);
1875 	else
1876 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1877 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1878 	pm4[i++] = 0xffffffff & bo_mc;
1879 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1880 	if (family_id >= AMDGPU_FAMILY_AI)
1881 		pm4[i++] = sdma_write_length - 1;
1882 	else if (family_id != AMDGPU_FAMILY_SI)
1883 		pm4[i++] = sdma_write_length;
1884 
1885 	while (j++ < sdma_write_length)
1886 		pm4[i++] = 0xdeadbeaf;
1887 
1888 	if (!fork()) {
1889 		pm4[0] = 0x0;
1890 		exit(0);
1891 	}
1892 
1893 	amdgpu_test_exec_cs_helper(context_handle,
1894 				   AMDGPU_HW_IP_DMA, 0,
1895 				   i, pm4,
1896 				   1, &handle,
1897 				   ib_info, ibs_request);
1898 	i = 0;
1899 	while (i < sdma_write_length) {
1900 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1901 	}
1902 	free(ibs_request);
1903 	free(ib_info);
1904 	free(pm4);
1905 
1906 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1907 	CU_ASSERT_EQUAL(r, 0);
1908 	r = amdgpu_va_range_free(va_handle);
1909 	CU_ASSERT_EQUAL(r, 0);
1910 	r = amdgpu_bo_free(buf_handle);
1911 	CU_ASSERT_EQUAL(r, 0);
1912 	free(ptr);
1913 
1914 	r = amdgpu_cs_ctx_free(context_handle);
1915 	CU_ASSERT_EQUAL(r, 0);
1916 
1917 	wait(NULL);
1918 }
1919 
amdgpu_sync_dependency_test(void)1920 static void amdgpu_sync_dependency_test(void)
1921 {
1922 	amdgpu_context_handle context_handle[2];
1923 	amdgpu_bo_handle ib_result_handle;
1924 	void *ib_result_cpu;
1925 	uint64_t ib_result_mc_address;
1926 	struct amdgpu_cs_request ibs_request;
1927 	struct amdgpu_cs_ib_info ib_info;
1928 	struct amdgpu_cs_fence fence_status;
1929 	uint32_t expired;
1930 	int i, j, r;
1931 	amdgpu_bo_list_handle bo_list;
1932 	amdgpu_va_handle va_handle;
1933 	static uint32_t *ptr;
1934 	uint64_t seq_no;
1935 
1936 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1937 	CU_ASSERT_EQUAL(r, 0);
1938 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1939 	CU_ASSERT_EQUAL(r, 0);
1940 
1941 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1942 			AMDGPU_GEM_DOMAIN_GTT, 0,
1943 						    &ib_result_handle, &ib_result_cpu,
1944 						    &ib_result_mc_address, &va_handle);
1945 	CU_ASSERT_EQUAL(r, 0);
1946 
1947 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1948 			       &bo_list);
1949 	CU_ASSERT_EQUAL(r, 0);
1950 
1951 	ptr = ib_result_cpu;
1952 	i = 0;
1953 
1954 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1955 
1956 	/* Dispatch minimal init config and verify it's executed */
1957 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1958 	ptr[i++] = 0x80000000;
1959 	ptr[i++] = 0x80000000;
1960 
1961 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1962 	ptr[i++] = 0x80000000;
1963 
1964 
1965 	/* Program compute regs */
1966 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1967 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1968 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1969 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1970 
1971 
1972 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1973 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1974 	/*
1975 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
1976 	                                      SGPRS = 1
1977 	                                      PRIORITY = 0
1978 	                                      FLOAT_MODE = 192 (0xc0)
1979 	                                      PRIV = 0
1980 	                                      DX10_CLAMP = 1
1981 	                                      DEBUG_MODE = 0
1982 	                                      IEEE_MODE = 0
1983 	                                      BULKY = 0
1984 	                                      CDBG_USER = 0
1985 	 *
1986 	 */
1987 	ptr[i++] = 0x002c0040;
1988 
1989 
1990 	/*
1991 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1992 	                                      USER_SGPR = 8
1993 	                                      TRAP_PRESENT = 0
1994 	                                      TGID_X_EN = 0
1995 	                                      TGID_Y_EN = 0
1996 	                                      TGID_Z_EN = 0
1997 	                                      TG_SIZE_EN = 0
1998 	                                      TIDIG_COMP_CNT = 0
1999 	                                      EXCP_EN_MSB = 0
2000 	                                      LDS_SIZE = 0
2001 	                                      EXCP_EN = 0
2002 	 *
2003 	 */
2004 	ptr[i++] = 0x00000010;
2005 
2006 
2007 /*
2008  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2009                                          WAVESIZE = 0
2010  *
2011  */
2012 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2013 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2014 	ptr[i++] = 0x00000100;
2015 
2016 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2017 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2018 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2019 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2020 
2021 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2022 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2023 	ptr[i++] = 0;
2024 
2025 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2026 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2027 	ptr[i++] = 1;
2028 	ptr[i++] = 1;
2029 	ptr[i++] = 1;
2030 
2031 
2032 	/* Dispatch */
2033 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2034 	ptr[i++] = 1;
2035 	ptr[i++] = 1;
2036 	ptr[i++] = 1;
2037 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2038 
2039 
2040 	while (i & 7)
2041 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2042 
2043 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2044 	ib_info.ib_mc_address = ib_result_mc_address;
2045 	ib_info.size = i;
2046 
2047 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2048 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2049 	ibs_request.ring = 0;
2050 	ibs_request.number_of_ibs = 1;
2051 	ibs_request.ibs = &ib_info;
2052 	ibs_request.resources = bo_list;
2053 	ibs_request.fence_info.handle = NULL;
2054 
2055 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2056 	CU_ASSERT_EQUAL(r, 0);
2057 	seq_no = ibs_request.seq_no;
2058 
2059 
2060 
2061 	/* Prepare second command with dependency on the first */
2062 	j = i;
2063 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2064 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2065 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2066 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2067 	ptr[i++] = 99;
2068 
2069 	while (i & 7)
2070 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2071 
2072 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2073 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2074 	ib_info.size = i - j;
2075 
2076 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2077 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2078 	ibs_request.ring = 0;
2079 	ibs_request.number_of_ibs = 1;
2080 	ibs_request.ibs = &ib_info;
2081 	ibs_request.resources = bo_list;
2082 	ibs_request.fence_info.handle = NULL;
2083 
2084 	ibs_request.number_of_dependencies = 1;
2085 
2086 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2087 	ibs_request.dependencies[0].context = context_handle[1];
2088 	ibs_request.dependencies[0].ip_instance = 0;
2089 	ibs_request.dependencies[0].ring = 0;
2090 	ibs_request.dependencies[0].fence = seq_no;
2091 
2092 
2093 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2094 	CU_ASSERT_EQUAL(r, 0);
2095 
2096 
2097 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2098 	fence_status.context = context_handle[0];
2099 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2100 	fence_status.ip_instance = 0;
2101 	fence_status.ring = 0;
2102 	fence_status.fence = ibs_request.seq_no;
2103 
2104 	r = amdgpu_cs_query_fence_status(&fence_status,
2105 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2106 	CU_ASSERT_EQUAL(r, 0);
2107 
2108 	/* Expect the second command to wait for shader to complete */
2109 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2110 
2111 	r = amdgpu_bo_list_destroy(bo_list);
2112 	CU_ASSERT_EQUAL(r, 0);
2113 
2114 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2115 				     ib_result_mc_address, 4096);
2116 	CU_ASSERT_EQUAL(r, 0);
2117 
2118 	r = amdgpu_cs_ctx_free(context_handle[0]);
2119 	CU_ASSERT_EQUAL(r, 0);
2120 	r = amdgpu_cs_ctx_free(context_handle[1]);
2121 	CU_ASSERT_EQUAL(r, 0);
2122 
2123 	free(ibs_request.dependencies);
2124 }
2125 
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2126 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2127 {
2128 	struct amdgpu_test_shader *shader;
2129 	int i, loop = 0x10000;
2130 
2131 	switch (family) {
2132 		case AMDGPU_FAMILY_AI:
2133 			shader = &memcpy_cs_hang_slow_ai;
2134 			break;
2135 		case AMDGPU_FAMILY_RV:
2136 			shader = &memcpy_cs_hang_slow_rv;
2137 			break;
2138 		default:
2139 			return -1;
2140 			break;
2141 	}
2142 
2143 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2144 
2145 	for (i = 0; i < loop; i++)
2146 		memcpy(ptr + shader->header_length + shader->body_length * i,
2147 			shader->shader + shader->header_length,
2148 			shader->body_length * sizeof(uint32_t));
2149 
2150 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2151 		shader->shader + shader->header_length + shader->body_length,
2152 		shader->foot_length * sizeof(uint32_t));
2153 
2154 	return 0;
2155 }
2156 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type)2157 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2158 					   int cs_type)
2159 {
2160 	uint32_t shader_size;
2161 	const uint32_t *shader;
2162 
2163 	switch (cs_type) {
2164 		case CS_BUFFERCLEAR:
2165 			shader = bufferclear_cs_shader_gfx9;
2166 			shader_size = sizeof(bufferclear_cs_shader_gfx9);
2167 			break;
2168 		case CS_BUFFERCOPY:
2169 			shader = buffercopy_cs_shader_gfx9;
2170 			shader_size = sizeof(buffercopy_cs_shader_gfx9);
2171 			break;
2172 		case CS_HANG:
2173 			shader = memcpy_ps_hang;
2174 			shader_size = sizeof(memcpy_ps_hang);
2175 			break;
2176 		default:
2177 			return -1;
2178 			break;
2179 	}
2180 
2181 	memcpy(ptr, shader, shader_size);
2182 	return 0;
2183 }
2184 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type)2185 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type)
2186 {
2187 	int i = 0;
2188 
2189 	/* Write context control and load shadowing register if necessary */
2190 	if (ip_type == AMDGPU_HW_IP_GFX) {
2191 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2192 		ptr[i++] = 0x80000000;
2193 		ptr[i++] = 0x80000000;
2194 	}
2195 
2196 	/* Issue commands to set default compute state. */
2197 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2198 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2199 	ptr[i++] = 0x204;
2200 	i += 3;
2201 
2202 	/* clear mmCOMPUTE_TMPRING_SIZE */
2203 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2204 	ptr[i++] = 0x218;
2205 	ptr[i++] = 0;
2206 
2207 	return i;
2208 }
2209 
amdgpu_dispatch_write_cumask(uint32_t * ptr)2210 static int amdgpu_dispatch_write_cumask(uint32_t *ptr)
2211 {
2212 	int i = 0;
2213 
2214 	/*  Issue commands to set cu mask used in current dispatch */
2215 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2216 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2217 	ptr[i++] = 0x216;
2218 	ptr[i++] = 0xffffffff;
2219 	ptr[i++] = 0xffffffff;
2220 	/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2221 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2222 	ptr[i++] = 0x219;
2223 	ptr[i++] = 0xffffffff;
2224 	ptr[i++] = 0xffffffff;
2225 
2226 	return i;
2227 }
2228 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr)2229 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr)
2230 {
2231 	int i, j;
2232 
2233 	i = 0;
2234 
2235 	/* Writes shader state to HW */
2236 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2237 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2238 	ptr[i++] = 0x20c;
2239 	ptr[i++] = (shader_addr >> 8);
2240 	ptr[i++] = (shader_addr >> 40);
2241 	/* write sh regs*/
2242 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2243 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2244 		/* - Gfx9ShRegBase */
2245 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2246 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2247 	}
2248 
2249 	return i;
2250 }
2251 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2252 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2253 					 uint32_t ip_type,
2254 					 uint32_t ring)
2255 {
2256 	amdgpu_context_handle context_handle;
2257 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2258 	volatile unsigned char *ptr_dst;
2259 	void *ptr_shader;
2260 	uint32_t *ptr_cmd;
2261 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2262 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2263 	int i, r;
2264 	int bo_dst_size = 16384;
2265 	int bo_shader_size = 4096;
2266 	int bo_cmd_size = 4096;
2267 	struct amdgpu_cs_request ibs_request = {0};
2268 	struct amdgpu_cs_ib_info ib_info= {0};
2269 	amdgpu_bo_list_handle bo_list;
2270 	struct amdgpu_cs_fence fence_status = {0};
2271 	uint32_t expired;
2272 
2273 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2274 	CU_ASSERT_EQUAL(r, 0);
2275 
2276 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2277 					AMDGPU_GEM_DOMAIN_GTT, 0,
2278 					&bo_cmd, (void **)&ptr_cmd,
2279 					&mc_address_cmd, &va_cmd);
2280 	CU_ASSERT_EQUAL(r, 0);
2281 	memset(ptr_cmd, 0, bo_cmd_size);
2282 
2283 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2284 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2285 					&bo_shader, &ptr_shader,
2286 					&mc_address_shader, &va_shader);
2287 	CU_ASSERT_EQUAL(r, 0);
2288 	memset(ptr_shader, 0, bo_shader_size);
2289 
2290 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR);
2291 	CU_ASSERT_EQUAL(r, 0);
2292 
2293 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2294 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2295 					&bo_dst, (void **)&ptr_dst,
2296 					&mc_address_dst, &va_dst);
2297 	CU_ASSERT_EQUAL(r, 0);
2298 
2299 	i = 0;
2300 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2301 
2302 	/*  Issue commands to set cu mask used in current dispatch */
2303 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2304 
2305 	/* Writes shader state to HW */
2306 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2307 
2308 	/* Write constant data */
2309 	/* Writes the UAV constant data to the SGPRs. */
2310 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2311 	ptr_cmd[i++] = 0x240;
2312 	ptr_cmd[i++] = mc_address_dst;
2313 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2314 	ptr_cmd[i++] = 0x400;
2315 	ptr_cmd[i++] = 0x74fac;
2316 
2317 	/* Sets a range of pixel shader constants */
2318 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2319 	ptr_cmd[i++] = 0x244;
2320 	ptr_cmd[i++] = 0x22222222;
2321 	ptr_cmd[i++] = 0x22222222;
2322 	ptr_cmd[i++] = 0x22222222;
2323 	ptr_cmd[i++] = 0x22222222;
2324 
2325 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2326 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2327 	ptr_cmd[i++] = 0x215;
2328 	ptr_cmd[i++] = 0;
2329 
2330 	/* dispatch direct command */
2331 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2332 	ptr_cmd[i++] = 0x10;
2333 	ptr_cmd[i++] = 1;
2334 	ptr_cmd[i++] = 1;
2335 	ptr_cmd[i++] = 1;
2336 
2337 	while (i & 7)
2338 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2339 
2340 	resources[0] = bo_dst;
2341 	resources[1] = bo_shader;
2342 	resources[2] = bo_cmd;
2343 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2344 	CU_ASSERT_EQUAL(r, 0);
2345 
2346 	ib_info.ib_mc_address = mc_address_cmd;
2347 	ib_info.size = i;
2348 	ibs_request.ip_type = ip_type;
2349 	ibs_request.ring = ring;
2350 	ibs_request.resources = bo_list;
2351 	ibs_request.number_of_ibs = 1;
2352 	ibs_request.ibs = &ib_info;
2353 	ibs_request.fence_info.handle = NULL;
2354 
2355 	/* submit CS */
2356 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2357 	CU_ASSERT_EQUAL(r, 0);
2358 
2359 	r = amdgpu_bo_list_destroy(bo_list);
2360 	CU_ASSERT_EQUAL(r, 0);
2361 
2362 	fence_status.ip_type = ip_type;
2363 	fence_status.ip_instance = 0;
2364 	fence_status.ring = ring;
2365 	fence_status.context = context_handle;
2366 	fence_status.fence = ibs_request.seq_no;
2367 
2368 	/* wait for IB accomplished */
2369 	r = amdgpu_cs_query_fence_status(&fence_status,
2370 					 AMDGPU_TIMEOUT_INFINITE,
2371 					 0, &expired);
2372 	CU_ASSERT_EQUAL(r, 0);
2373 	CU_ASSERT_EQUAL(expired, true);
2374 
2375 	/* verify if memset test result meets with expected */
2376 	i = 0;
2377 	while(i < bo_dst_size) {
2378 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2379 	}
2380 
2381 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2382 	CU_ASSERT_EQUAL(r, 0);
2383 
2384 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2385 	CU_ASSERT_EQUAL(r, 0);
2386 
2387 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2388 	CU_ASSERT_EQUAL(r, 0);
2389 
2390 	r = amdgpu_cs_ctx_free(context_handle);
2391 	CU_ASSERT_EQUAL(r, 0);
2392 }
2393 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int hang)2394 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2395 					uint32_t ip_type,
2396 					uint32_t ring,
2397 					int hang)
2398 {
2399 	amdgpu_context_handle context_handle;
2400 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2401 	volatile unsigned char *ptr_dst;
2402 	void *ptr_shader;
2403 	unsigned char *ptr_src;
2404 	uint32_t *ptr_cmd;
2405 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2406 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2407 	int i, r;
2408 	int bo_dst_size = 16384;
2409 	int bo_shader_size = 4096;
2410 	int bo_cmd_size = 4096;
2411 	struct amdgpu_cs_request ibs_request = {0};
2412 	struct amdgpu_cs_ib_info ib_info= {0};
2413 	uint32_t expired, hang_state, hangs;
2414 	enum cs_type cs_type;
2415 	amdgpu_bo_list_handle bo_list;
2416 	struct amdgpu_cs_fence fence_status = {0};
2417 
2418 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2419 	CU_ASSERT_EQUAL(r, 0);
2420 
2421 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2422 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2423 				    &bo_cmd, (void **)&ptr_cmd,
2424 				    &mc_address_cmd, &va_cmd);
2425 	CU_ASSERT_EQUAL(r, 0);
2426 	memset(ptr_cmd, 0, bo_cmd_size);
2427 
2428 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2429 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2430 					&bo_shader, &ptr_shader,
2431 					&mc_address_shader, &va_shader);
2432 	CU_ASSERT_EQUAL(r, 0);
2433 	memset(ptr_shader, 0, bo_shader_size);
2434 
2435 	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2436 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type);
2437 	CU_ASSERT_EQUAL(r, 0);
2438 
2439 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2440 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2441 					&bo_src, (void **)&ptr_src,
2442 					&mc_address_src, &va_src);
2443 	CU_ASSERT_EQUAL(r, 0);
2444 
2445 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2446 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2447 					&bo_dst, (void **)&ptr_dst,
2448 					&mc_address_dst, &va_dst);
2449 	CU_ASSERT_EQUAL(r, 0);
2450 
2451 	memset(ptr_src, 0x55, bo_dst_size);
2452 
2453 	i = 0;
2454 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2455 
2456 	/*  Issue commands to set cu mask used in current dispatch */
2457 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2458 
2459 	/* Writes shader state to HW */
2460 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2461 
2462 	/* Write constant data */
2463 	/* Writes the texture resource constants data to the SGPRs */
2464 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2465 	ptr_cmd[i++] = 0x240;
2466 	ptr_cmd[i++] = mc_address_src;
2467 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2468 	ptr_cmd[i++] = 0x400;
2469 	ptr_cmd[i++] = 0x74fac;
2470 
2471 	/* Writes the UAV constant data to the SGPRs. */
2472 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2473 	ptr_cmd[i++] = 0x244;
2474 	ptr_cmd[i++] = mc_address_dst;
2475 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2476 	ptr_cmd[i++] = 0x400;
2477 	ptr_cmd[i++] = 0x74fac;
2478 
2479 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2480 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2481 	ptr_cmd[i++] = 0x215;
2482 	ptr_cmd[i++] = 0;
2483 
2484 	/* dispatch direct command */
2485 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2486 	ptr_cmd[i++] = 0x10;
2487 	ptr_cmd[i++] = 1;
2488 	ptr_cmd[i++] = 1;
2489 	ptr_cmd[i++] = 1;
2490 
2491 	while (i & 7)
2492 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2493 
2494 	resources[0] = bo_shader;
2495 	resources[1] = bo_src;
2496 	resources[2] = bo_dst;
2497 	resources[3] = bo_cmd;
2498 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2499 	CU_ASSERT_EQUAL(r, 0);
2500 
2501 	ib_info.ib_mc_address = mc_address_cmd;
2502 	ib_info.size = i;
2503 	ibs_request.ip_type = ip_type;
2504 	ibs_request.ring = ring;
2505 	ibs_request.resources = bo_list;
2506 	ibs_request.number_of_ibs = 1;
2507 	ibs_request.ibs = &ib_info;
2508 	ibs_request.fence_info.handle = NULL;
2509 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2510 	CU_ASSERT_EQUAL(r, 0);
2511 
2512 	fence_status.ip_type = ip_type;
2513 	fence_status.ip_instance = 0;
2514 	fence_status.ring = ring;
2515 	fence_status.context = context_handle;
2516 	fence_status.fence = ibs_request.seq_no;
2517 
2518 	/* wait for IB accomplished */
2519 	r = amdgpu_cs_query_fence_status(&fence_status,
2520 					 AMDGPU_TIMEOUT_INFINITE,
2521 					 0, &expired);
2522 
2523 	if (!hang) {
2524 		CU_ASSERT_EQUAL(r, 0);
2525 		CU_ASSERT_EQUAL(expired, true);
2526 
2527 		/* verify if memcpy test result meets with expected */
2528 		i = 0;
2529 		while(i < bo_dst_size) {
2530 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2531 			i++;
2532 		}
2533 	} else {
2534 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2535 		CU_ASSERT_EQUAL(r, 0);
2536 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2537 	}
2538 
2539 	r = amdgpu_bo_list_destroy(bo_list);
2540 	CU_ASSERT_EQUAL(r, 0);
2541 
2542 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2543 	CU_ASSERT_EQUAL(r, 0);
2544 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2545 	CU_ASSERT_EQUAL(r, 0);
2546 
2547 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2548 	CU_ASSERT_EQUAL(r, 0);
2549 
2550 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2551 	CU_ASSERT_EQUAL(r, 0);
2552 
2553 	r = amdgpu_cs_ctx_free(context_handle);
2554 	CU_ASSERT_EQUAL(r, 0);
2555 }
2556 
amdgpu_compute_dispatch_test(void)2557 static void amdgpu_compute_dispatch_test(void)
2558 {
2559 	int r;
2560 	struct drm_amdgpu_info_hw_ip info;
2561 	uint32_t ring_id;
2562 
2563 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2564 	CU_ASSERT_EQUAL(r, 0);
2565 	if (!info.available_rings)
2566 		printf("SKIP ... as there's no compute ring\n");
2567 
2568 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2569 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id);
2570 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0);
2571 	}
2572 }
2573 
amdgpu_gfx_dispatch_test(void)2574 static void amdgpu_gfx_dispatch_test(void)
2575 {
2576 	int r;
2577 	struct drm_amdgpu_info_hw_ip info;
2578 	uint32_t ring_id;
2579 
2580 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2581 	CU_ASSERT_EQUAL(r, 0);
2582 	if (!info.available_rings)
2583 		printf("SKIP ... as there's no graphics ring\n");
2584 
2585 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2586 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id);
2587 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0);
2588 	}
2589 }
2590 
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2591 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2592 {
2593 	int r;
2594 	struct drm_amdgpu_info_hw_ip info;
2595 	uint32_t ring_id;
2596 
2597 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2598 	CU_ASSERT_EQUAL(r, 0);
2599 	if (!info.available_rings)
2600 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2601 
2602 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2603 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2604 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1);
2605 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2606 	}
2607 }
2608 
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring)2609 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
2610 						  uint32_t ip_type, uint32_t ring)
2611 {
2612 	amdgpu_context_handle context_handle;
2613 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2614 	volatile unsigned char *ptr_dst;
2615 	void *ptr_shader;
2616 	unsigned char *ptr_src;
2617 	uint32_t *ptr_cmd;
2618 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2619 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2620 	int i, r;
2621 	int bo_dst_size = 0x4000000;
2622 	int bo_shader_size = 0x400000;
2623 	int bo_cmd_size = 4096;
2624 	struct amdgpu_cs_request ibs_request = {0};
2625 	struct amdgpu_cs_ib_info ib_info= {0};
2626 	uint32_t hang_state, hangs, expired;
2627 	struct amdgpu_gpu_info gpu_info = {0};
2628 	amdgpu_bo_list_handle bo_list;
2629 	struct amdgpu_cs_fence fence_status = {0};
2630 
2631 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
2632 	CU_ASSERT_EQUAL(r, 0);
2633 
2634 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2635 	CU_ASSERT_EQUAL(r, 0);
2636 
2637 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2638 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2639 				    &bo_cmd, (void **)&ptr_cmd,
2640 				    &mc_address_cmd, &va_cmd);
2641 	CU_ASSERT_EQUAL(r, 0);
2642 	memset(ptr_cmd, 0, bo_cmd_size);
2643 
2644 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2645 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2646 					&bo_shader, &ptr_shader,
2647 					&mc_address_shader, &va_shader);
2648 	CU_ASSERT_EQUAL(r, 0);
2649 	memset(ptr_shader, 0, bo_shader_size);
2650 
2651 	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
2652 	CU_ASSERT_EQUAL(r, 0);
2653 
2654 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2655 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2656 					&bo_src, (void **)&ptr_src,
2657 					&mc_address_src, &va_src);
2658 	CU_ASSERT_EQUAL(r, 0);
2659 
2660 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2661 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2662 					&bo_dst, (void **)&ptr_dst,
2663 					&mc_address_dst, &va_dst);
2664 	CU_ASSERT_EQUAL(r, 0);
2665 
2666 	memset(ptr_src, 0x55, bo_dst_size);
2667 
2668 	i = 0;
2669 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type);
2670 
2671 	/*  Issue commands to set cu mask used in current dispatch */
2672 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i);
2673 
2674 	/* Writes shader state to HW */
2675 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader);
2676 
2677 	/* Write constant data */
2678 	/* Writes the texture resource constants data to the SGPRs */
2679 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2680 	ptr_cmd[i++] = 0x240;
2681 	ptr_cmd[i++] = mc_address_src;
2682 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2683 	ptr_cmd[i++] = 0x400000;
2684 	ptr_cmd[i++] = 0x74fac;
2685 
2686 	/* Writes the UAV constant data to the SGPRs. */
2687 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2688 	ptr_cmd[i++] = 0x244;
2689 	ptr_cmd[i++] = mc_address_dst;
2690 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2691 	ptr_cmd[i++] = 0x400000;
2692 	ptr_cmd[i++] = 0x74fac;
2693 
2694 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2695 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2696 	ptr_cmd[i++] = 0x215;
2697 	ptr_cmd[i++] = 0;
2698 
2699 	/* dispatch direct command */
2700 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2701 	ptr_cmd[i++] = 0x10000;
2702 	ptr_cmd[i++] = 1;
2703 	ptr_cmd[i++] = 1;
2704 	ptr_cmd[i++] = 1;
2705 
2706 	while (i & 7)
2707 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2708 
2709 	resources[0] = bo_shader;
2710 	resources[1] = bo_src;
2711 	resources[2] = bo_dst;
2712 	resources[3] = bo_cmd;
2713 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2714 	CU_ASSERT_EQUAL(r, 0);
2715 
2716 	ib_info.ib_mc_address = mc_address_cmd;
2717 	ib_info.size = i;
2718 	ibs_request.ip_type = ip_type;
2719 	ibs_request.ring = ring;
2720 	ibs_request.resources = bo_list;
2721 	ibs_request.number_of_ibs = 1;
2722 	ibs_request.ibs = &ib_info;
2723 	ibs_request.fence_info.handle = NULL;
2724 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2725 	CU_ASSERT_EQUAL(r, 0);
2726 
2727 	fence_status.ip_type = ip_type;
2728 	fence_status.ip_instance = 0;
2729 	fence_status.ring = ring;
2730 	fence_status.context = context_handle;
2731 	fence_status.fence = ibs_request.seq_no;
2732 
2733 	/* wait for IB accomplished */
2734 	r = amdgpu_cs_query_fence_status(&fence_status,
2735 					 AMDGPU_TIMEOUT_INFINITE,
2736 					 0, &expired);
2737 
2738 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2739 	CU_ASSERT_EQUAL(r, 0);
2740 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2741 
2742 	r = amdgpu_bo_list_destroy(bo_list);
2743 	CU_ASSERT_EQUAL(r, 0);
2744 
2745 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2746 	CU_ASSERT_EQUAL(r, 0);
2747 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2748 	CU_ASSERT_EQUAL(r, 0);
2749 
2750 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2751 	CU_ASSERT_EQUAL(r, 0);
2752 
2753 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2754 	CU_ASSERT_EQUAL(r, 0);
2755 
2756 	r = amdgpu_cs_ctx_free(context_handle);
2757 	CU_ASSERT_EQUAL(r, 0);
2758 }
2759 
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2760 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2761 {
2762 	int r;
2763 	struct drm_amdgpu_info_hw_ip info;
2764 	uint32_t ring_id;
2765 
2766 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
2767 	CU_ASSERT_EQUAL(r, 0);
2768 	if (!info.available_rings)
2769 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
2770 
2771 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2772 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2773 		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id);
2774 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0);
2775 	}
2776 }
2777 
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)2778 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
2779 {
2780 	struct amdgpu_test_shader *shader;
2781 	int i, loop = 0x40000;
2782 
2783 	switch (family) {
2784 		case AMDGPU_FAMILY_AI:
2785 		case AMDGPU_FAMILY_RV:
2786 			shader = &memcpy_ps_hang_slow_ai;
2787 			break;
2788 		default:
2789 			return -1;
2790 			break;
2791 	}
2792 
2793 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2794 
2795 	for (i = 0; i < loop; i++)
2796 		memcpy(ptr + shader->header_length + shader->body_length * i,
2797 			shader->shader + shader->header_length,
2798 			shader->body_length * sizeof(uint32_t));
2799 
2800 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2801 		shader->shader + shader->header_length + shader->body_length,
2802 		shader->foot_length * sizeof(uint32_t));
2803 
2804 	return 0;
2805 }
2806 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type)2807 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type)
2808 {
2809 	int i;
2810 	uint32_t shader_offset= 256;
2811 	uint32_t mem_offset, patch_code_offset;
2812 	uint32_t shader_size, patchinfo_code_size;
2813 	const uint32_t *shader;
2814 	const uint32_t *patchinfo_code;
2815 	const uint32_t *patchcode_offset;
2816 
2817 	switch (ps_type) {
2818 		case PS_CONST:
2819 			shader = ps_const_shader_gfx9;
2820 			shader_size = sizeof(ps_const_shader_gfx9);
2821 			patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
2822 			patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
2823 			patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
2824 			break;
2825 		case PS_TEX:
2826 			shader = ps_tex_shader_gfx9;
2827 			shader_size = sizeof(ps_tex_shader_gfx9);
2828 			patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
2829 			patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
2830 			patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
2831 			break;
2832 		case PS_HANG:
2833 			shader = memcpy_ps_hang;
2834 			shader_size = sizeof(memcpy_ps_hang);
2835 
2836 			memcpy(ptr, shader, shader_size);
2837 			return 0;
2838 		default:
2839 			return -1;
2840 			break;
2841 	}
2842 
2843 	/* write main shader program */
2844 	for (i = 0 ; i < 10; i++) {
2845 		mem_offset = i * shader_offset;
2846 		memcpy(ptr + mem_offset, shader, shader_size);
2847 	}
2848 
2849 	/* overwrite patch codes */
2850 	for (i = 0 ; i < 10; i++) {
2851 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
2852 		patch_code_offset = i * patchinfo_code_size;
2853 		memcpy(ptr + mem_offset,
2854 			patchinfo_code + patch_code_offset,
2855 			patchinfo_code_size * sizeof(uint32_t));
2856 	}
2857 
2858 	return 0;
2859 }
2860 
2861 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr)2862 static int amdgpu_draw_load_vs_shader(uint8_t *ptr)
2863 {
2864 	const uint32_t *shader;
2865 	uint32_t shader_size;
2866 
2867 	shader = vs_RectPosTexFast_shader_gfx9;
2868 	shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
2869 
2870 	memcpy(ptr, shader, shader_size);
2871 
2872 	return 0;
2873 }
2874 
amdgpu_draw_init(uint32_t * ptr)2875 static int amdgpu_draw_init(uint32_t *ptr)
2876 {
2877 	int i = 0;
2878 	const uint32_t *preamblecache_ptr;
2879 	uint32_t preamblecache_size;
2880 
2881 	/* Write context control and load shadowing register if necessary */
2882 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2883 	ptr[i++] = 0x80000000;
2884 	ptr[i++] = 0x80000000;
2885 
2886 	preamblecache_ptr = preamblecache_gfx9;
2887 	preamblecache_size = sizeof(preamblecache_gfx9);
2888 
2889 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
2890 	return i + preamblecache_size/sizeof(uint32_t);
2891 }
2892 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,int hang_slow)2893 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
2894 							 uint64_t dst_addr,
2895 							 int hang_slow)
2896 {
2897 	int i = 0;
2898 
2899 	/* setup color buffer */
2900 	/* offset   reg
2901 	   0xA318   CB_COLOR0_BASE
2902 	   0xA319   CB_COLOR0_BASE_EXT
2903 	   0xA31A   CB_COLOR0_ATTRIB2
2904 	   0xA31B   CB_COLOR0_VIEW
2905 	   0xA31C   CB_COLOR0_INFO
2906 	   0xA31D   CB_COLOR0_ATTRIB
2907 	   0xA31E   CB_COLOR0_DCC_CONTROL
2908 	   0xA31F   CB_COLOR0_CMASK
2909 	   0xA320   CB_COLOR0_CMASK_BASE_EXT
2910 	   0xA321   CB_COLOR0_FMASK
2911 	   0xA322   CB_COLOR0_FMASK_BASE_EXT
2912 	   0xA323   CB_COLOR0_CLEAR_WORD0
2913 	   0xA324   CB_COLOR0_CLEAR_WORD1
2914 	   0xA325   CB_COLOR0_DCC_BASE
2915 	   0xA326   CB_COLOR0_DCC_BASE_EXT */
2916 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
2917 	ptr[i++] = 0x318;
2918 	ptr[i++] = dst_addr >> 8;
2919 	ptr[i++] = dst_addr >> 40;
2920 	ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f;
2921 	ptr[i++] = 0;
2922 	ptr[i++] = 0x50438;
2923 	ptr[i++] = 0x10140000;
2924 	i += 9;
2925 
2926 	/* mmCB_MRT0_EPITCH */
2927 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2928 	ptr[i++] = 0x1e8;
2929 	ptr[i++] = hang_slow ? 0x7ff : 0x1f;
2930 
2931 	/* 0xA32B   CB_COLOR1_BASE */
2932 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2933 	ptr[i++] = 0x32b;
2934 	ptr[i++] = 0;
2935 
2936 	/* 0xA33A   CB_COLOR1_BASE */
2937 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2938 	ptr[i++] = 0x33a;
2939 	ptr[i++] = 0;
2940 
2941 	/* SPI_SHADER_COL_FORMAT */
2942 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2943 	ptr[i++] = 0x1c5;
2944 	ptr[i++] = 9;
2945 
2946 	/* Setup depth buffer */
2947 	/* mmDB_Z_INFO */
2948 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2949 	ptr[i++] = 0xe;
2950 	i += 2;
2951 
2952 	return i;
2953 }
2954 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,int hang_slow)2955 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow)
2956 {
2957 	int i = 0;
2958 	const uint32_t *cached_cmd_ptr;
2959 	uint32_t cached_cmd_size;
2960 
2961 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
2962 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2963 	ptr[i++] = 0xd7;
2964 	ptr[i++] = 0;
2965 
2966 	ptr[i++] = 0xffff1000;
2967 	ptr[i++] = 0xc0021000;
2968 
2969 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
2970 	ptr[i++] = 0xd7;
2971 	ptr[i++] = 1;
2972 
2973 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
2974 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
2975 	ptr[i++] = 0x2fe;
2976 	i += 16;
2977 
2978 	/* mmPA_SC_CENTROID_PRIORITY_0 */
2979 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
2980 	ptr[i++] = 0x2f5;
2981 	i += 2;
2982 
2983 	cached_cmd_ptr = cached_cmd_gfx9;
2984 	cached_cmd_size = sizeof(cached_cmd_gfx9);
2985 
2986 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
2987 	if (hang_slow)
2988 		*(ptr + i + 12) = 0x8000800;
2989 	i += cached_cmd_size/sizeof(uint32_t);
2990 
2991 	return i;
2992 }
2993 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,int hang_slow)2994 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
2995 						  int ps_type,
2996 						  uint64_t shader_addr,
2997 						  int hang_slow)
2998 {
2999 	int i = 0;
3000 
3001 	/* mmPA_CL_VS_OUT_CNTL */
3002 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3003 	ptr[i++] = 0x207;
3004 	ptr[i++] = 0;
3005 
3006 	/* mmSPI_SHADER_PGM_RSRC3_VS */
3007 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3008 	ptr[i++] = 0x46;
3009 	ptr[i++] = 0xffff;
3010 
3011 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3012 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3013 	ptr[i++] = 0x48;
3014 	ptr[i++] = shader_addr >> 8;
3015 	ptr[i++] = shader_addr >> 40;
3016 
3017 	/* mmSPI_SHADER_PGM_RSRC1_VS */
3018 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3019 	ptr[i++] = 0x4a;
3020 	ptr[i++] = 0xc0081;
3021 	/* mmSPI_SHADER_PGM_RSRC2_VS */
3022 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3023 	ptr[i++] = 0x4b;
3024 	ptr[i++] = 0x18;
3025 
3026 	/* mmSPI_VS_OUT_CONFIG */
3027 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3028 	ptr[i++] = 0x1b1;
3029 	ptr[i++] = 2;
3030 
3031 	/* mmSPI_SHADER_POS_FORMAT */
3032 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3033 	ptr[i++] = 0x1c3;
3034 	ptr[i++] = 4;
3035 
3036 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3037 	ptr[i++] = 0x4c;
3038 	i += 2;
3039 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3040 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3041 
3042 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3043 	ptr[i++] = 0x50;
3044 	i += 2;
3045 	if (ps_type == PS_CONST) {
3046 		i += 2;
3047 	} else if (ps_type == PS_TEX) {
3048 		ptr[i++] = 0x3f800000;
3049 		ptr[i++] = 0x3f800000;
3050 	}
3051 
3052 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3053 	ptr[i++] = 0x54;
3054 	i += 4;
3055 
3056 	return i;
3057 }
3058 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr)3059 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3060 				   int ps_type,
3061 				   uint64_t shader_addr)
3062 {
3063 	int i, j;
3064 	const uint32_t *sh_registers;
3065 	const uint32_t *context_registers;
3066 	uint32_t num_sh_reg, num_context_reg;
3067 
3068 	if (ps_type == PS_CONST) {
3069 		sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3070 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3071 		num_sh_reg = ps_num_sh_registers_gfx9;
3072 		num_context_reg = ps_num_context_registers_gfx9;
3073 	} else if (ps_type == PS_TEX) {
3074 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3075 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3076 		num_sh_reg = ps_num_sh_registers_gfx9;
3077 		num_context_reg = ps_num_context_registers_gfx9;
3078 	}
3079 
3080 	i = 0;
3081 
3082 	/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3083 	   0x2c08   SPI_SHADER_PGM_LO_PS
3084 	   0x2c09   SPI_SHADER_PGM_HI_PS */
3085 	shader_addr += 256 * 9;
3086 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3087 	ptr[i++] = 0x7;
3088 	ptr[i++] = 0xffff;
3089 	ptr[i++] = shader_addr >> 8;
3090 	ptr[i++] = shader_addr >> 40;
3091 
3092 	for (j = 0; j < num_sh_reg; j++) {
3093 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3094 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3095 		ptr[i++] = sh_registers[j * 2 + 1];
3096 	}
3097 
3098 	for (j = 0; j < num_context_reg; j++) {
3099 		if (context_registers[j * 2] != 0xA1C5) {
3100 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3101 			ptr[i++] = context_registers[j * 2] - 0xa000;
3102 			ptr[i++] = context_registers[j * 2 + 1];
3103 		}
3104 
3105 		if (context_registers[j * 2] == 0xA1B4) {
3106 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3107 			ptr[i++] = 0x1b3;
3108 			ptr[i++] = 2;
3109 		}
3110 	}
3111 
3112 	return i;
3113 }
3114 
amdgpu_draw_draw(uint32_t * ptr)3115 static int amdgpu_draw_draw(uint32_t *ptr)
3116 {
3117 	int i = 0;
3118 
3119 	/* mmIA_MULTI_VGT_PARAM */
3120 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3121 	ptr[i++] = 0x40000258;
3122 	ptr[i++] = 0xd00ff;
3123 
3124 	/* mmVGT_PRIMITIVE_TYPE */
3125 	ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3126 	ptr[i++] = 0x10000242;
3127 	ptr[i++] = 0x11;
3128 
3129 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3130 	ptr[i++] = 3;
3131 	ptr[i++] = 2;
3132 
3133 	return i;
3134 }
3135 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id)3136 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3137 			amdgpu_bo_handle bo_shader_ps,
3138 			amdgpu_bo_handle bo_shader_vs,
3139 			uint64_t mc_address_shader_ps,
3140 			uint64_t mc_address_shader_vs,
3141 			uint32_t ring_id)
3142 {
3143 	amdgpu_context_handle context_handle;
3144 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3145 	volatile unsigned char *ptr_dst;
3146 	uint32_t *ptr_cmd;
3147 	uint64_t mc_address_dst, mc_address_cmd;
3148 	amdgpu_va_handle va_dst, va_cmd;
3149 	int i, r;
3150 	int bo_dst_size = 16384;
3151 	int bo_cmd_size = 4096;
3152 	struct amdgpu_cs_request ibs_request = {0};
3153 	struct amdgpu_cs_ib_info ib_info = {0};
3154 	struct amdgpu_cs_fence fence_status = {0};
3155 	uint32_t expired;
3156 	amdgpu_bo_list_handle bo_list;
3157 
3158 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3159 	CU_ASSERT_EQUAL(r, 0);
3160 
3161 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3162 					AMDGPU_GEM_DOMAIN_GTT, 0,
3163 					&bo_cmd, (void **)&ptr_cmd,
3164 					&mc_address_cmd, &va_cmd);
3165 	CU_ASSERT_EQUAL(r, 0);
3166 	memset(ptr_cmd, 0, bo_cmd_size);
3167 
3168 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3169 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3170 					&bo_dst, (void **)&ptr_dst,
3171 					&mc_address_dst, &va_dst);
3172 	CU_ASSERT_EQUAL(r, 0);
3173 
3174 	i = 0;
3175 	i += amdgpu_draw_init(ptr_cmd + i);
3176 
3177 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3178 
3179 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3180 
3181 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0);
3182 
3183 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps);
3184 
3185 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3186 	ptr_cmd[i++] = 0xc;
3187 	ptr_cmd[i++] = 0x33333333;
3188 	ptr_cmd[i++] = 0x33333333;
3189 	ptr_cmd[i++] = 0x33333333;
3190 	ptr_cmd[i++] = 0x33333333;
3191 
3192 	i += amdgpu_draw_draw(ptr_cmd + i);
3193 
3194 	while (i & 7)
3195 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3196 
3197 	resources[0] = bo_dst;
3198 	resources[1] = bo_shader_ps;
3199 	resources[2] = bo_shader_vs;
3200 	resources[3] = bo_cmd;
3201 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3202 	CU_ASSERT_EQUAL(r, 0);
3203 
3204 	ib_info.ib_mc_address = mc_address_cmd;
3205 	ib_info.size = i;
3206 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3207 	ibs_request.ring = ring_id;
3208 	ibs_request.resources = bo_list;
3209 	ibs_request.number_of_ibs = 1;
3210 	ibs_request.ibs = &ib_info;
3211 	ibs_request.fence_info.handle = NULL;
3212 
3213 	/* submit CS */
3214 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3215 	CU_ASSERT_EQUAL(r, 0);
3216 
3217 	r = amdgpu_bo_list_destroy(bo_list);
3218 	CU_ASSERT_EQUAL(r, 0);
3219 
3220 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3221 	fence_status.ip_instance = 0;
3222 	fence_status.ring = ring_id;
3223 	fence_status.context = context_handle;
3224 	fence_status.fence = ibs_request.seq_no;
3225 
3226 	/* wait for IB accomplished */
3227 	r = amdgpu_cs_query_fence_status(&fence_status,
3228 					 AMDGPU_TIMEOUT_INFINITE,
3229 					 0, &expired);
3230 	CU_ASSERT_EQUAL(r, 0);
3231 	CU_ASSERT_EQUAL(expired, true);
3232 
3233 	/* verify if memset test result meets with expected */
3234 	i = 0;
3235 	while(i < bo_dst_size) {
3236 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3237 	}
3238 
3239 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3240 	CU_ASSERT_EQUAL(r, 0);
3241 
3242 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3243 	CU_ASSERT_EQUAL(r, 0);
3244 
3245 	r = amdgpu_cs_ctx_free(context_handle);
3246 	CU_ASSERT_EQUAL(r, 0);
3247 }
3248 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring)3249 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3250 				    uint32_t ring)
3251 {
3252 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3253 	void *ptr_shader_ps;
3254 	void *ptr_shader_vs;
3255 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3256 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3257 	int r;
3258 	int bo_shader_size = 4096;
3259 
3260 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3261 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3262 					&bo_shader_ps, &ptr_shader_ps,
3263 					&mc_address_shader_ps, &va_shader_ps);
3264 	CU_ASSERT_EQUAL(r, 0);
3265 	memset(ptr_shader_ps, 0, bo_shader_size);
3266 
3267 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3268 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3269 					&bo_shader_vs, &ptr_shader_vs,
3270 					&mc_address_shader_vs, &va_shader_vs);
3271 	CU_ASSERT_EQUAL(r, 0);
3272 	memset(ptr_shader_vs, 0, bo_shader_size);
3273 
3274 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST);
3275 	CU_ASSERT_EQUAL(r, 0);
3276 
3277 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3278 	CU_ASSERT_EQUAL(r, 0);
3279 
3280 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3281 			mc_address_shader_ps, mc_address_shader_vs, ring);
3282 
3283 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3284 	CU_ASSERT_EQUAL(r, 0);
3285 
3286 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3287 	CU_ASSERT_EQUAL(r, 0);
3288 }
3289 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int hang)3290 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3291 			       amdgpu_bo_handle bo_shader_ps,
3292 			       amdgpu_bo_handle bo_shader_vs,
3293 			       uint64_t mc_address_shader_ps,
3294 			       uint64_t mc_address_shader_vs,
3295 			       uint32_t ring, int hang)
3296 {
3297 	amdgpu_context_handle context_handle;
3298 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3299 	volatile unsigned char *ptr_dst;
3300 	unsigned char *ptr_src;
3301 	uint32_t *ptr_cmd;
3302 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3303 	amdgpu_va_handle va_dst, va_src, va_cmd;
3304 	int i, r;
3305 	int bo_size = 16384;
3306 	int bo_cmd_size = 4096;
3307 	struct amdgpu_cs_request ibs_request = {0};
3308 	struct amdgpu_cs_ib_info ib_info= {0};
3309 	uint32_t hang_state, hangs;
3310 	uint32_t expired;
3311 	amdgpu_bo_list_handle bo_list;
3312 	struct amdgpu_cs_fence fence_status = {0};
3313 
3314 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3315 	CU_ASSERT_EQUAL(r, 0);
3316 
3317 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3318 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3319 				    &bo_cmd, (void **)&ptr_cmd,
3320 				    &mc_address_cmd, &va_cmd);
3321 	CU_ASSERT_EQUAL(r, 0);
3322 	memset(ptr_cmd, 0, bo_cmd_size);
3323 
3324 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3325 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3326 					&bo_src, (void **)&ptr_src,
3327 					&mc_address_src, &va_src);
3328 	CU_ASSERT_EQUAL(r, 0);
3329 
3330 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3331 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3332 					&bo_dst, (void **)&ptr_dst,
3333 					&mc_address_dst, &va_dst);
3334 	CU_ASSERT_EQUAL(r, 0);
3335 
3336 	memset(ptr_src, 0x55, bo_size);
3337 
3338 	i = 0;
3339 	i += amdgpu_draw_init(ptr_cmd + i);
3340 
3341 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0);
3342 
3343 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0);
3344 
3345 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0);
3346 
3347 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3348 
3349 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3350 	ptr_cmd[i++] = 0xc;
3351 	ptr_cmd[i++] = mc_address_src >> 8;
3352 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3353 	ptr_cmd[i++] = 0x7c01f;
3354 	ptr_cmd[i++] = 0x90500fac;
3355 	ptr_cmd[i++] = 0x3e000;
3356 	i += 3;
3357 
3358 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3359 	ptr_cmd[i++] = 0x14;
3360 	ptr_cmd[i++] = 0x92;
3361 	i += 3;
3362 
3363 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3364 	ptr_cmd[i++] = 0x191;
3365 	ptr_cmd[i++] = 0;
3366 
3367 	i += amdgpu_draw_draw(ptr_cmd + i);
3368 
3369 	while (i & 7)
3370 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3371 
3372 	resources[0] = bo_dst;
3373 	resources[1] = bo_src;
3374 	resources[2] = bo_shader_ps;
3375 	resources[3] = bo_shader_vs;
3376 	resources[4] = bo_cmd;
3377 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3378 	CU_ASSERT_EQUAL(r, 0);
3379 
3380 	ib_info.ib_mc_address = mc_address_cmd;
3381 	ib_info.size = i;
3382 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3383 	ibs_request.ring = ring;
3384 	ibs_request.resources = bo_list;
3385 	ibs_request.number_of_ibs = 1;
3386 	ibs_request.ibs = &ib_info;
3387 	ibs_request.fence_info.handle = NULL;
3388 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3389 	CU_ASSERT_EQUAL(r, 0);
3390 
3391 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3392 	fence_status.ip_instance = 0;
3393 	fence_status.ring = ring;
3394 	fence_status.context = context_handle;
3395 	fence_status.fence = ibs_request.seq_no;
3396 
3397 	/* wait for IB accomplished */
3398 	r = amdgpu_cs_query_fence_status(&fence_status,
3399 					 AMDGPU_TIMEOUT_INFINITE,
3400 					 0, &expired);
3401 	if (!hang) {
3402 		CU_ASSERT_EQUAL(r, 0);
3403 		CU_ASSERT_EQUAL(expired, true);
3404 
3405 		/* verify if memcpy test result meets with expected */
3406 		i = 0;
3407 		while(i < bo_size) {
3408 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
3409 			i++;
3410 		}
3411 	} else {
3412 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3413 		CU_ASSERT_EQUAL(r, 0);
3414 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3415 	}
3416 
3417 	r = amdgpu_bo_list_destroy(bo_list);
3418 	CU_ASSERT_EQUAL(r, 0);
3419 
3420 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3421 	CU_ASSERT_EQUAL(r, 0);
3422 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3423 	CU_ASSERT_EQUAL(r, 0);
3424 
3425 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3426 	CU_ASSERT_EQUAL(r, 0);
3427 
3428 	r = amdgpu_cs_ctx_free(context_handle);
3429 	CU_ASSERT_EQUAL(r, 0);
3430 }
3431 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int hang)3432 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
3433 			     int hang)
3434 {
3435 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3436 	void *ptr_shader_ps;
3437 	void *ptr_shader_vs;
3438 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3439 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3440 	int bo_shader_size = 4096;
3441 	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
3442 	int r;
3443 
3444 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3445 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3446 					&bo_shader_ps, &ptr_shader_ps,
3447 					&mc_address_shader_ps, &va_shader_ps);
3448 	CU_ASSERT_EQUAL(r, 0);
3449 	memset(ptr_shader_ps, 0, bo_shader_size);
3450 
3451 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3452 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3453 					&bo_shader_vs, &ptr_shader_vs,
3454 					&mc_address_shader_vs, &va_shader_vs);
3455 	CU_ASSERT_EQUAL(r, 0);
3456 	memset(ptr_shader_vs, 0, bo_shader_size);
3457 
3458 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type);
3459 	CU_ASSERT_EQUAL(r, 0);
3460 
3461 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3462 	CU_ASSERT_EQUAL(r, 0);
3463 
3464 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
3465 			mc_address_shader_ps, mc_address_shader_vs, ring, hang);
3466 
3467 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3468 	CU_ASSERT_EQUAL(r, 0);
3469 
3470 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3471 	CU_ASSERT_EQUAL(r, 0);
3472 }
3473 
amdgpu_draw_test(void)3474 static void amdgpu_draw_test(void)
3475 {
3476 	int r;
3477 	struct drm_amdgpu_info_hw_ip info;
3478 	uint32_t ring_id;
3479 
3480 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
3481 	CU_ASSERT_EQUAL(r, 0);
3482 	if (!info.available_rings)
3483 		printf("SKIP ... as there's no graphics ring\n");
3484 
3485 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3486 		amdgpu_memset_draw_test(device_handle, ring_id);
3487 		amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
3488 	}
3489 }
3490 
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring)3491 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring)
3492 {
3493 	amdgpu_context_handle context_handle;
3494 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3495 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3496 	void *ptr_shader_ps;
3497 	void *ptr_shader_vs;
3498 	volatile unsigned char *ptr_dst;
3499 	unsigned char *ptr_src;
3500 	uint32_t *ptr_cmd;
3501 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3502 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3503 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3504 	amdgpu_va_handle va_dst, va_src, va_cmd;
3505 	struct amdgpu_gpu_info gpu_info = {0};
3506 	int i, r;
3507 	int bo_size = 0x4000000;
3508 	int bo_shader_ps_size = 0x400000;
3509 	int bo_shader_vs_size = 4096;
3510 	int bo_cmd_size = 4096;
3511 	struct amdgpu_cs_request ibs_request = {0};
3512 	struct amdgpu_cs_ib_info ib_info= {0};
3513 	uint32_t hang_state, hangs, expired;
3514 	amdgpu_bo_list_handle bo_list;
3515 	struct amdgpu_cs_fence fence_status = {0};
3516 
3517 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3518 	CU_ASSERT_EQUAL(r, 0);
3519 
3520 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3521 	CU_ASSERT_EQUAL(r, 0);
3522 
3523 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3524 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3525 				    &bo_cmd, (void **)&ptr_cmd,
3526 				    &mc_address_cmd, &va_cmd);
3527 	CU_ASSERT_EQUAL(r, 0);
3528 	memset(ptr_cmd, 0, bo_cmd_size);
3529 
3530 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
3531 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3532 					&bo_shader_ps, &ptr_shader_ps,
3533 					&mc_address_shader_ps, &va_shader_ps);
3534 	CU_ASSERT_EQUAL(r, 0);
3535 	memset(ptr_shader_ps, 0, bo_shader_ps_size);
3536 
3537 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
3538 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3539 					&bo_shader_vs, &ptr_shader_vs,
3540 					&mc_address_shader_vs, &va_shader_vs);
3541 	CU_ASSERT_EQUAL(r, 0);
3542 	memset(ptr_shader_vs, 0, bo_shader_vs_size);
3543 
3544 	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
3545 	CU_ASSERT_EQUAL(r, 0);
3546 
3547 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs);
3548 	CU_ASSERT_EQUAL(r, 0);
3549 
3550 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3551 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3552 					&bo_src, (void **)&ptr_src,
3553 					&mc_address_src, &va_src);
3554 	CU_ASSERT_EQUAL(r, 0);
3555 
3556 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3557 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3558 					&bo_dst, (void **)&ptr_dst,
3559 					&mc_address_dst, &va_dst);
3560 	CU_ASSERT_EQUAL(r, 0);
3561 
3562 	memset(ptr_src, 0x55, bo_size);
3563 
3564 	i = 0;
3565 	i += amdgpu_draw_init(ptr_cmd + i);
3566 
3567 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1);
3568 
3569 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1);
3570 
3571 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
3572 							mc_address_shader_vs, 1);
3573 
3574 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps);
3575 
3576 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3577 	ptr_cmd[i++] = 0xc;
3578 	ptr_cmd[i++] = mc_address_src >> 8;
3579 	ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3580 	ptr_cmd[i++] = 0x1ffc7ff;
3581 	ptr_cmd[i++] = 0x90500fac;
3582 	ptr_cmd[i++] = 0xffe000;
3583 	i += 3;
3584 
3585 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3586 	ptr_cmd[i++] = 0x14;
3587 	ptr_cmd[i++] = 0x92;
3588 	i += 3;
3589 
3590 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3591 	ptr_cmd[i++] = 0x191;
3592 	ptr_cmd[i++] = 0;
3593 
3594 	i += amdgpu_draw_draw(ptr_cmd + i);
3595 
3596 	while (i & 7)
3597 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3598 
3599 	resources[0] = bo_dst;
3600 	resources[1] = bo_src;
3601 	resources[2] = bo_shader_ps;
3602 	resources[3] = bo_shader_vs;
3603 	resources[4] = bo_cmd;
3604 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3605 	CU_ASSERT_EQUAL(r, 0);
3606 
3607 	ib_info.ib_mc_address = mc_address_cmd;
3608 	ib_info.size = i;
3609 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3610 	ibs_request.ring = ring;
3611 	ibs_request.resources = bo_list;
3612 	ibs_request.number_of_ibs = 1;
3613 	ibs_request.ibs = &ib_info;
3614 	ibs_request.fence_info.handle = NULL;
3615 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3616 	CU_ASSERT_EQUAL(r, 0);
3617 
3618 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3619 	fence_status.ip_instance = 0;
3620 	fence_status.ring = ring;
3621 	fence_status.context = context_handle;
3622 	fence_status.fence = ibs_request.seq_no;
3623 
3624 	/* wait for IB accomplished */
3625 	r = amdgpu_cs_query_fence_status(&fence_status,
3626 					 AMDGPU_TIMEOUT_INFINITE,
3627 					 0, &expired);
3628 
3629 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3630 	CU_ASSERT_EQUAL(r, 0);
3631 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3632 
3633 	r = amdgpu_bo_list_destroy(bo_list);
3634 	CU_ASSERT_EQUAL(r, 0);
3635 
3636 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
3637 	CU_ASSERT_EQUAL(r, 0);
3638 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
3639 	CU_ASSERT_EQUAL(r, 0);
3640 
3641 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3642 	CU_ASSERT_EQUAL(r, 0);
3643 
3644 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
3645 	CU_ASSERT_EQUAL(r, 0);
3646 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
3647 	CU_ASSERT_EQUAL(r, 0);
3648 
3649 	r = amdgpu_cs_ctx_free(context_handle);
3650 	CU_ASSERT_EQUAL(r, 0);
3651 }
3652 
amdgpu_gpu_reset_test(void)3653 static void amdgpu_gpu_reset_test(void)
3654 {
3655 	int r;
3656 	char debugfs_path[256], tmp[10];
3657 	int fd;
3658 	struct stat sbuf;
3659 	amdgpu_context_handle context_handle;
3660 	uint32_t hang_state, hangs;
3661 
3662 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3663 	CU_ASSERT_EQUAL(r, 0);
3664 
3665 	r = fstat(drm_amdgpu[0], &sbuf);
3666 	CU_ASSERT_EQUAL(r, 0);
3667 
3668 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
3669 	fd = open(debugfs_path, O_RDONLY);
3670 	CU_ASSERT(fd >= 0);
3671 
3672 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
3673 	CU_ASSERT(r > 0);
3674 
3675 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3676 	CU_ASSERT_EQUAL(r, 0);
3677 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3678 
3679 	close(fd);
3680 	r = amdgpu_cs_ctx_free(context_handle);
3681 	CU_ASSERT_EQUAL(r, 0);
3682 
3683 	amdgpu_compute_dispatch_test();
3684 	amdgpu_gfx_dispatch_test();
3685 }
3686