• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44 
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49 static  uint32_t  chip_id;
50 static  uint32_t  chip_rev;
51 
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66 
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 				       unsigned ip_type,
72 				       int instance, int pm4_dw, uint32_t *pm4_src,
73 				       int res_cnt, amdgpu_bo_handle *resources,
74 				       struct amdgpu_cs_ib_info *ib_info,
75 				       struct amdgpu_cs_request *ibs_request);
76 
77 CU_TestInfo basic_tests[] = {
78 	{ "Query Info Test",  amdgpu_query_info_test },
79 	{ "Userptr Test",  amdgpu_userptr_test },
80 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
81 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
82 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 	{ "SW semaphore Test",  amdgpu_semaphore_test },
86 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
87 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
88 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
89 	{ "Draw Test",  amdgpu_draw_test },
90 	{ "GPU reset Test", amdgpu_gpu_reset_test },
91 	{ "Stable pstate Test", amdgpu_stable_pstate_test },
92 	CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask   0x000000FF
97 #define SDMA_PKT_HEADER_op_shift  0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL  11
100 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
101 	/* 0 = byte fill
102 	 * 2 = DW fill
103 	 */
104 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
105 					(((sub_op) & 0xFF) << 8) |	\
106 					(((op) & 0xFF) << 0))
107 #define	SDMA_OPCODE_WRITE				  2
108 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
109 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
110 
111 #define	SDMA_OPCODE_COPY				  1
112 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
113 
114 #define	SDMA_OPCODE_ATOMIC				  10
115 #		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
116         /* 0 - single_pass_atomic.
117          * 1 - loop_until_compare_satisfied.
118          */
119 #		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
120 		/* 0 - non-TMZ.
121 		 * 1 - TMZ.
122 	     */
123 #		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
124 		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 		 * same as Packet 3
126 		 */
127 
128 #define GFX_COMPUTE_NOP  0xffff1000
129 #define SDMA_NOP  0x0
130 
131 /* PM4 */
132 #define	PACKET_TYPE0	0
133 #define	PACKET_TYPE1	1
134 #define	PACKET_TYPE2	2
135 #define	PACKET_TYPE3	3
136 
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
142 			 ((reg) & 0xFFFF) |			\
143 			 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2			0x80000000
145 #define		PACKET2_PAD_SHIFT		0
146 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
147 
148 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149 
150 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
151 			 (((op) & 0xFF) << 8) |				\
152 			 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154 
155 /* Packet 3 types */
156 #define	PACKET3_NOP					0x10
157 
158 #define	PACKET3_WRITE_DATA				0x37
159 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
160 		/* 0 - register
161 		 * 1 - memory (sync - via GRBM)
162 		 * 2 - gl2
163 		 * 3 - gds
164 		 * 4 - reserved
165 		 * 5 - memory (async - direct)
166 		 */
167 #define		WR_ONE_ADDR                             (1 << 16)
168 #define		WR_CONFIRM                              (1 << 20)
169 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
170 		/* 0 - LRU
171 		 * 1 - Stream
172 		 */
173 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
174 		/* 0 - me
175 		 * 1 - pfp
176 		 * 2 - ce
177 		 */
178 
179 #define	PACKET3_ATOMIC_MEM				0x1E
180 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
181 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
182             /* 0 - single_pass_atomic.
183              * 1 - loop_until_compare_satisfied.
184              */
185 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
186             /* 0 - lru.
187              * 1 - stream.
188              */
189 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
190             /* 0 - micro_engine.
191 			 */
192 
193 #define	PACKET3_DMA_DATA				0x50
194 /* 1. header
195  * 2. CONTROL
196  * 3. SRC_ADDR_LO or DATA [31:0]
197  * 4. SRC_ADDR_HI [31:0]
198  * 5. DST_ADDR_LO [31:0]
199  * 6. DST_ADDR_HI [7:0]
200  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201  */
202 /* CONTROL */
203 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
204 		/* 0 - ME
205 		 * 1 - PFP
206 		 */
207 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 		/* 0 - LRU
209 		 * 1 - Stream
210 		 * 2 - Bypass
211 		 */
212 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
214 		/* 0 - DST_ADDR using DAS
215 		 * 1 - GDS
216 		 * 3 - DST_ADDR using L2
217 		 */
218 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 		/* 0 - LRU
220 		 * 1 - Stream
221 		 * 2 - Bypass
222 		 */
223 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
225 		/* 0 - SRC_ADDR using SAS
226 		 * 1 - GDS
227 		 * 2 - DATA
228 		 * 3 - SRC_ADDR using L2
229 		 */
230 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
231 /* COMMAND */
232 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
233 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 		/* 0 - none
235 		 * 1 - 8 in 16
236 		 * 2 - 8 in 32
237 		 * 3 - 8 in 64
238 		 */
239 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 		/* 0 - none
241 		 * 1 - 8 in 16
242 		 * 2 - 8 in 32
243 		 * 3 - 8 in 64
244 		 */
245 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
246 		/* 0 - memory
247 		 * 1 - register
248 		 */
249 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
250 		/* 0 - memory
251 		 * 1 - register
252 		 */
253 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
254 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
255 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
256 
257 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258 						(((b) & 0x1) << 26) |		\
259 						(((t) & 0x1) << 23) |		\
260 						(((s) & 0x1) << 22) |		\
261 						(((cnt) & 0xFFFFF) << 0))
262 #define	SDMA_OPCODE_COPY_SI	3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
264 #define SDMA_NOP_SI  0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define	PACKET3_DMA_DATA_SI	0x41
267 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268 		/* 0 - ME
269 		 * 1 - PFP
270 		 */
271 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272 		/* 0 - DST_ADDR using DAS
273 		 * 1 - GDS
274 		 * 3 - DST_ADDR using L2
275 		 */
276 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277 		/* 0 - SRC_ADDR using SAS
278 		 * 1 - GDS
279 		 * 2 - DATA
280 		 * 3 - SRC_ADDR using L2
281 		 */
282 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283 
284 
285 #define PKT3_CONTEXT_CONTROL                   0x28
286 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
287 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
288 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
289 
290 #define PKT3_CLEAR_STATE                       0x12
291 
292 #define PKT3_SET_SH_REG                        0x76
293 #define		PACKET3_SET_SH_REG_START			0x00002c00
294 
295 #define PKT3_SET_SH_REG_INDEX			0x9B
296 
297 #define	PACKET3_DISPATCH_DIRECT				0x15
298 #define PACKET3_EVENT_WRITE				0x46
299 #define PACKET3_ACQUIRE_MEM				0x58
300 #define PACKET3_SET_CONTEXT_REG				0x69
301 #define PACKET3_SET_UCONFIG_REG				0x79
302 #define PACKET3_DRAW_INDEX_AUTO				0x2D
303 /* gfx 8 */
304 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
305 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
306 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
307 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
308 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
309 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
310 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
311 
312 
313 
314 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315 		      ((num & 0x0000ff00) << 8) | \
316 		      ((num & 0x00ff0000) >> 8) | \
317 		      ((num & 0x000000ff) << 24))
318 
319 
320 /* Shader code
321  * void main()
322 {
323 
324 	float x = some_input;
325 		for (unsigned i = 0; i < 1000000; i++)
326   	x = sin(x);
327 
328 	u[0] = 42u;
329 }
330 */
331 
332 static  uint32_t shader_bin[] = {
333 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337 };
338 
339 #define CODE_OFFSET 512
340 #define DATA_OFFSET 1024
341 
342 enum cs_type {
343 	CS_BUFFERCLEAR,
344 	CS_BUFFERCOPY,
345 	CS_HANG,
346 	CS_HANG_SLOW
347 };
348 
349 static const uint32_t bufferclear_cs_shader_gfx9[] = {
350     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351     0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352     0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353     0xbf810000
354 };
355 
356 static const uint32_t bufferclear_cs_shader_gfx10[] = {
357 	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358 	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359 	0xBF810000
360 };
361 
362 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
364 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
365 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368 };
369 
370 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371 
372 static const uint32_t buffercopy_cs_shader_gfx9[] = {
373     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374     0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375     0xe01c2000, 0x80010200, 0xbf810000
376 };
377 
378 static const uint32_t buffercopy_cs_shader_gfx10[] = {
379 	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380 	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381 };
382 
383 static const uint32_t preamblecache_gfx9[] = {
384 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
390 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403 	0xc0017900, 0x24b, 0x0
404 };
405 
406 static const uint32_t preamblecache_gfx10[] = {
407 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409 	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412 	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415 	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423 	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424 	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425 	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426 	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427 	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428 	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429 	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430 };
431 
432 enum ps_type {
433 	PS_CONST,
434 	PS_TEX,
435 	PS_HANG,
436 	PS_HANG_SLOW
437 };
438 
439 static const uint32_t ps_const_shader_gfx9[] = {
440     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442     0xC4001C0F, 0x00000100, 0xBF810000
443 };
444 
445 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446 
447 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458     }
459 };
460 
461 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462     0x00000004
463 };
464 
465 static const uint32_t ps_num_sh_registers_gfx9 = 2;
466 
467 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470 };
471 
472 static const uint32_t ps_num_context_registers_gfx9 = 7;
473 
474 static const uint32_t ps_const_context_reg_gfx9[][2] = {
475     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
476     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
477     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
478     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
479     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
480     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
481     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
482 };
483 
484 static const uint32_t ps_const_shader_gfx10[] = {
485     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486     0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487     0xF8001C0F, 0x00000100, 0xBF810000
488 };
489 
490 static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491 
492 static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497      { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498      { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499      { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500      { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501      { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503     }
504 };
505 
506 static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507     0x00000004
508 };
509 
510 static const uint32_t ps_num_sh_registers_gfx10 = 2;
511 
512 static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513     {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515 };
516 
517 static const uint32_t ps_tex_shader_gfx9[] = {
518     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522     0x00000100, 0xBF810000
523 };
524 
525 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526     0x0000000B
527 };
528 
529 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530 
531 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542     }
543 };
544 
545 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548 };
549 
550 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
552     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
553     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
554     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
555     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
556     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
557     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
558 };
559 
560 static const uint32_t ps_tex_shader_gfx10[] = {
561     0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562     0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563     0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564     0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565     0xF8001C0F, 0x00000100, 0xBF810000
566 };
567 
568 static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569     0x0000000C
570 };
571 
572 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573 
574 static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579      { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580      { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581      { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582      { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583      { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585     }
586 };
587 
588 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596     0xC400020F, 0x05060403, 0xBF810000
597 };
598 
599 static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600     0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601     0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602     0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603     0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604     0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605     0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606     0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607     0xBF810000
608 };
609 
610 static const uint32_t cached_cmd_gfx9[] = {
611 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
615 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617 	0xc0026900, 0x292, 0x20, 0x60201b8,
618 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619 };
620 
621 static const uint32_t cached_cmd_gfx10[] = {
622 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625 	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628 	0xc0026900, 0x292, 0x20, 0x6020000,
629 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630 };
631 
632 unsigned int memcpy_ps_hang[] = {
633         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637         0xF800180F, 0x03020100, 0xBF810000
638 };
639 
640 struct amdgpu_test_shader {
641 	uint32_t *shader;
642 	uint32_t header_length;
643 	uint32_t body_length;
644 	uint32_t foot_length;
645 };
646 
647 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650 };
651 
652 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653         memcpy_cs_hang_slow_ai_codes,
654         4,
655         3,
656         1
657 };
658 
659 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662 };
663 
664 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665         memcpy_cs_hang_slow_rv_codes,
666         4,
667         3,
668         1
669 };
670 
671 unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672     0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673     0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674 };
675 
676 struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677         memcpy_cs_hang_slow_nv_codes,
678         4,
679         3,
680         1
681 };
682 
683 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688         0x03020100, 0xbf810000
689 };
690 
691 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692         memcpy_ps_hang_slow_ai_codes,
693         7,
694         2,
695         9
696 };
697 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)698 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
700 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701 			uint64_t *mc_address,
702 			amdgpu_va_handle *va_handle)
703 {
704 	struct amdgpu_bo_alloc_request request = {};
705 	amdgpu_bo_handle buf_handle;
706 	amdgpu_va_handle handle;
707 	uint64_t vmc_addr;
708 	int r;
709 
710 	request.alloc_size = size;
711 	request.phys_alignment = alignment;
712 	request.preferred_heap = heap;
713 	request.flags = alloc_flags;
714 
715 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716 	if (r)
717 		return r;
718 
719 	r = amdgpu_va_range_alloc(dev,
720 				  amdgpu_gpu_va_range_general,
721 				  size, alignment, 0, &vmc_addr,
722 				  &handle, 0);
723 	if (r)
724 		goto error_va_alloc;
725 
726 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
727 				   AMDGPU_VM_PAGE_READABLE |
728 				   AMDGPU_VM_PAGE_WRITEABLE |
729 				   AMDGPU_VM_PAGE_EXECUTABLE |
730 				   mapping_flags,
731 				   AMDGPU_VA_OP_MAP);
732 	if (r)
733 		goto error_va_map;
734 
735 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
736 	if (r)
737 		goto error_cpu_map;
738 
739 	*bo = buf_handle;
740 	*mc_address = vmc_addr;
741 	*va_handle = handle;
742 
743 	return 0;
744 
745  error_cpu_map:
746 	amdgpu_bo_cpu_unmap(buf_handle);
747 
748  error_va_map:
749 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750 
751  error_va_alloc:
752 	amdgpu_bo_free(buf_handle);
753 	return r;
754 }
755 
756 
757 
suite_basic_tests_enable(void)758 CU_BOOL suite_basic_tests_enable(void)
759 {
760 
761 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762 					     &minor_version, &device_handle))
763 		return CU_FALSE;
764 
765 
766 	family_id = device_handle->info.family_id;
767 	chip_id = device_handle->info.chip_external_rev;
768 	chip_rev = device_handle->info.chip_rev;
769 
770 	if (amdgpu_device_deinitialize(device_handle))
771 		return CU_FALSE;
772 
773 	/* disable gfx engine basic test cases for some asics have no CPG */
774 	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775 		if (amdgpu_set_test_active("Basic Tests",
776 					"Command submission Test (GFX)",
777 					CU_FALSE))
778 			fprintf(stderr, "test deactivation failed - %s\n",
779 				CU_get_error_msg());
780 
781 		if (amdgpu_set_test_active("Basic Tests",
782 					"Command submission Test (Multi-Fence)",
783 					CU_FALSE))
784 			fprintf(stderr, "test deactivation failed - %s\n",
785 				CU_get_error_msg());
786 
787 		if (amdgpu_set_test_active("Basic Tests",
788 					"Sync dependency Test",
789 					CU_FALSE))
790 			fprintf(stderr, "test deactivation failed - %s\n",
791 				CU_get_error_msg());
792 	}
793 
794 	return CU_TRUE;
795 }
796 
suite_basic_tests_init(void)797 int suite_basic_tests_init(void)
798 {
799 	struct amdgpu_gpu_info gpu_info = {0};
800 	int r;
801 
802 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803 				   &minor_version, &device_handle);
804 
805 	if (r) {
806 		if ((r == -EACCES) && (errno == EACCES))
807 			printf("\n\nError:%s. "
808 				"Hint:Try to run this test program as root.",
809 				strerror(errno));
810 		return CUE_SINIT_FAILED;
811 	}
812 
813 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814 	if (r)
815 		return CUE_SINIT_FAILED;
816 
817 	family_id = gpu_info.family_id;
818 
819 	return CUE_SUCCESS;
820 }
821 
suite_basic_tests_clean(void)822 int suite_basic_tests_clean(void)
823 {
824 	int r = amdgpu_device_deinitialize(device_handle);
825 
826 	if (r == 0)
827 		return CUE_SUCCESS;
828 	else
829 		return CUE_SCLEAN_FAILED;
830 }
831 
amdgpu_query_info_test(void)832 static void amdgpu_query_info_test(void)
833 {
834 	struct amdgpu_gpu_info gpu_info = {0};
835 	uint32_t version, feature;
836 	int r;
837 
838 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839 	CU_ASSERT_EQUAL(r, 0);
840 
841 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842 					  0, &version, &feature);
843 	CU_ASSERT_EQUAL(r, 0);
844 }
845 
amdgpu_command_submission_gfx_separate_ibs(void)846 static void amdgpu_command_submission_gfx_separate_ibs(void)
847 {
848 	amdgpu_context_handle context_handle;
849 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850 	void *ib_result_cpu, *ib_result_ce_cpu;
851 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852 	struct amdgpu_cs_request ibs_request = {0};
853 	struct amdgpu_cs_ib_info ib_info[2];
854 	struct amdgpu_cs_fence fence_status = {0};
855 	uint32_t *ptr;
856 	uint32_t expired;
857 	amdgpu_bo_list_handle bo_list;
858 	amdgpu_va_handle va_handle, va_handle_ce;
859 	int r, i = 0;
860 
861 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
862 	CU_ASSERT_EQUAL(r, 0);
863 
864 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
865 				    AMDGPU_GEM_DOMAIN_GTT, 0,
866 				    &ib_result_handle, &ib_result_cpu,
867 				    &ib_result_mc_address, &va_handle);
868 	CU_ASSERT_EQUAL(r, 0);
869 
870 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
871 				    AMDGPU_GEM_DOMAIN_GTT, 0,
872 				    &ib_result_ce_handle, &ib_result_ce_cpu,
873 				    &ib_result_ce_mc_address, &va_handle_ce);
874 	CU_ASSERT_EQUAL(r, 0);
875 
876 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
877 			       ib_result_ce_handle, &bo_list);
878 	CU_ASSERT_EQUAL(r, 0);
879 
880 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
881 
882 	/* IT_SET_CE_DE_COUNTERS */
883 	ptr = ib_result_ce_cpu;
884 	if (family_id != AMDGPU_FAMILY_SI) {
885 		ptr[i++] = 0xc0008900;
886 		ptr[i++] = 0;
887 	}
888 	ptr[i++] = 0xc0008400;
889 	ptr[i++] = 1;
890 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
891 	ib_info[0].size = i;
892 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
893 
894 	/* IT_WAIT_ON_CE_COUNTER */
895 	ptr = ib_result_cpu;
896 	ptr[0] = 0xc0008600;
897 	ptr[1] = 0x00000001;
898 	ib_info[1].ib_mc_address = ib_result_mc_address;
899 	ib_info[1].size = 2;
900 
901 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
902 	ibs_request.number_of_ibs = 2;
903 	ibs_request.ibs = ib_info;
904 	ibs_request.resources = bo_list;
905 	ibs_request.fence_info.handle = NULL;
906 
907 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
908 
909 	CU_ASSERT_EQUAL(r, 0);
910 
911 	fence_status.context = context_handle;
912 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
913 	fence_status.ip_instance = 0;
914 	fence_status.fence = ibs_request.seq_no;
915 
916 	r = amdgpu_cs_query_fence_status(&fence_status,
917 					 AMDGPU_TIMEOUT_INFINITE,
918 					 0, &expired);
919 	CU_ASSERT_EQUAL(r, 0);
920 
921 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
922 				     ib_result_mc_address, 4096);
923 	CU_ASSERT_EQUAL(r, 0);
924 
925 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
926 				     ib_result_ce_mc_address, 4096);
927 	CU_ASSERT_EQUAL(r, 0);
928 
929 	r = amdgpu_bo_list_destroy(bo_list);
930 	CU_ASSERT_EQUAL(r, 0);
931 
932 	r = amdgpu_cs_ctx_free(context_handle);
933 	CU_ASSERT_EQUAL(r, 0);
934 
935 }
936 
amdgpu_command_submission_gfx_shared_ib(void)937 static void amdgpu_command_submission_gfx_shared_ib(void)
938 {
939 	amdgpu_context_handle context_handle;
940 	amdgpu_bo_handle ib_result_handle;
941 	void *ib_result_cpu;
942 	uint64_t ib_result_mc_address;
943 	struct amdgpu_cs_request ibs_request = {0};
944 	struct amdgpu_cs_ib_info ib_info[2];
945 	struct amdgpu_cs_fence fence_status = {0};
946 	uint32_t *ptr;
947 	uint32_t expired;
948 	amdgpu_bo_list_handle bo_list;
949 	amdgpu_va_handle va_handle;
950 	int r, i = 0;
951 
952 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
953 	CU_ASSERT_EQUAL(r, 0);
954 
955 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
956 				    AMDGPU_GEM_DOMAIN_GTT, 0,
957 				    &ib_result_handle, &ib_result_cpu,
958 				    &ib_result_mc_address, &va_handle);
959 	CU_ASSERT_EQUAL(r, 0);
960 
961 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
962 			       &bo_list);
963 	CU_ASSERT_EQUAL(r, 0);
964 
965 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
966 
967 	/* IT_SET_CE_DE_COUNTERS */
968 	ptr = ib_result_cpu;
969 	if (family_id != AMDGPU_FAMILY_SI) {
970 		ptr[i++] = 0xc0008900;
971 		ptr[i++] = 0;
972 	}
973 	ptr[i++] = 0xc0008400;
974 	ptr[i++] = 1;
975 	ib_info[0].ib_mc_address = ib_result_mc_address;
976 	ib_info[0].size = i;
977 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
978 
979 	ptr = (uint32_t *)ib_result_cpu + 4;
980 	ptr[0] = 0xc0008600;
981 	ptr[1] = 0x00000001;
982 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
983 	ib_info[1].size = 2;
984 
985 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
986 	ibs_request.number_of_ibs = 2;
987 	ibs_request.ibs = ib_info;
988 	ibs_request.resources = bo_list;
989 	ibs_request.fence_info.handle = NULL;
990 
991 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
992 
993 	CU_ASSERT_EQUAL(r, 0);
994 
995 	fence_status.context = context_handle;
996 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
997 	fence_status.ip_instance = 0;
998 	fence_status.fence = ibs_request.seq_no;
999 
1000 	r = amdgpu_cs_query_fence_status(&fence_status,
1001 					 AMDGPU_TIMEOUT_INFINITE,
1002 					 0, &expired);
1003 	CU_ASSERT_EQUAL(r, 0);
1004 
1005 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1006 				     ib_result_mc_address, 4096);
1007 	CU_ASSERT_EQUAL(r, 0);
1008 
1009 	r = amdgpu_bo_list_destroy(bo_list);
1010 	CU_ASSERT_EQUAL(r, 0);
1011 
1012 	r = amdgpu_cs_ctx_free(context_handle);
1013 	CU_ASSERT_EQUAL(r, 0);
1014 }
1015 
amdgpu_command_submission_gfx_cp_write_data(void)1016 static void amdgpu_command_submission_gfx_cp_write_data(void)
1017 {
1018 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1019 }
1020 
amdgpu_command_submission_gfx_cp_const_fill(void)1021 static void amdgpu_command_submission_gfx_cp_const_fill(void)
1022 {
1023 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1024 }
1025 
amdgpu_command_submission_gfx_cp_copy_data(void)1026 static void amdgpu_command_submission_gfx_cp_copy_data(void)
1027 {
1028 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1029 }
1030 
amdgpu_bo_eviction_test(void)1031 static void amdgpu_bo_eviction_test(void)
1032 {
1033 	const int sdma_write_length = 1024;
1034 	const int pm4_dw = 256;
1035 	amdgpu_context_handle context_handle;
1036 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1037 	amdgpu_bo_handle *resources;
1038 	uint32_t *pm4;
1039 	struct amdgpu_cs_ib_info *ib_info;
1040 	struct amdgpu_cs_request *ibs_request;
1041 	uint64_t bo1_mc, bo2_mc;
1042 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1043 	int i, j, r, loop1, loop2;
1044 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1045 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1046 	struct amdgpu_heap_info vram_info, gtt_info;
1047 
1048 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1049 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1050 
1051 	ib_info = calloc(1, sizeof(*ib_info));
1052 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1053 
1054 	ibs_request = calloc(1, sizeof(*ibs_request));
1055 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1056 
1057 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1058 	CU_ASSERT_EQUAL(r, 0);
1059 
1060 	/* prepare resource */
1061 	resources = calloc(4, sizeof(amdgpu_bo_handle));
1062 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1063 
1064 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1065 				   0, &vram_info);
1066 	CU_ASSERT_EQUAL(r, 0);
1067 
1068 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1069 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1070 	CU_ASSERT_EQUAL(r, 0);
1071 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1072 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1073 	CU_ASSERT_EQUAL(r, 0);
1074 
1075 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1076 				   0, &gtt_info);
1077 	CU_ASSERT_EQUAL(r, 0);
1078 
1079 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1080 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
1081 	CU_ASSERT_EQUAL(r, 0);
1082 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1083 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
1084 	CU_ASSERT_EQUAL(r, 0);
1085 
1086 
1087 
1088 	loop1 = loop2 = 0;
1089 	/* run 9 circle to test all mapping combination */
1090 	while(loop1 < 2) {
1091 		while(loop2 < 2) {
1092 			/* allocate UC bo1for sDMA use */
1093 			r = amdgpu_bo_alloc_and_map(device_handle,
1094 						    sdma_write_length, 4096,
1095 						    AMDGPU_GEM_DOMAIN_GTT,
1096 						    gtt_flags[loop1], &bo1,
1097 						    (void**)&bo1_cpu, &bo1_mc,
1098 						    &bo1_va_handle);
1099 			CU_ASSERT_EQUAL(r, 0);
1100 
1101 			/* set bo1 */
1102 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1103 
1104 			/* allocate UC bo2 for sDMA use */
1105 			r = amdgpu_bo_alloc_and_map(device_handle,
1106 						    sdma_write_length, 4096,
1107 						    AMDGPU_GEM_DOMAIN_GTT,
1108 						    gtt_flags[loop2], &bo2,
1109 						    (void**)&bo2_cpu, &bo2_mc,
1110 						    &bo2_va_handle);
1111 			CU_ASSERT_EQUAL(r, 0);
1112 
1113 			/* clear bo2 */
1114 			memset((void*)bo2_cpu, 0, sdma_write_length);
1115 
1116 			resources[0] = bo1;
1117 			resources[1] = bo2;
1118 			resources[2] = vram_max[loop2];
1119 			resources[3] = gtt_max[loop2];
1120 
1121 			/* fulfill PM4: test DMA copy linear */
1122 			i = j = 0;
1123 			if (family_id == AMDGPU_FAMILY_SI) {
1124 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1125 							  sdma_write_length);
1126 				pm4[i++] = 0xffffffff & bo2_mc;
1127 				pm4[i++] = 0xffffffff & bo1_mc;
1128 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1129 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1130 			} else {
1131 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1132 				if (family_id >= AMDGPU_FAMILY_AI)
1133 					pm4[i++] = sdma_write_length - 1;
1134 				else
1135 					pm4[i++] = sdma_write_length;
1136 				pm4[i++] = 0;
1137 				pm4[i++] = 0xffffffff & bo1_mc;
1138 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1139 				pm4[i++] = 0xffffffff & bo2_mc;
1140 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1141 			}
1142 
1143 			amdgpu_test_exec_cs_helper(context_handle,
1144 						   AMDGPU_HW_IP_DMA, 0,
1145 						   i, pm4,
1146 						   4, resources,
1147 						   ib_info, ibs_request);
1148 
1149 			/* verify if SDMA test result meets with expected */
1150 			i = 0;
1151 			while(i < sdma_write_length) {
1152 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1153 			}
1154 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1155 						     sdma_write_length);
1156 			CU_ASSERT_EQUAL(r, 0);
1157 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1158 						     sdma_write_length);
1159 			CU_ASSERT_EQUAL(r, 0);
1160 			loop2++;
1161 		}
1162 		loop2 = 0;
1163 		loop1++;
1164 	}
1165 	amdgpu_bo_free(vram_max[0]);
1166 	amdgpu_bo_free(vram_max[1]);
1167 	amdgpu_bo_free(gtt_max[0]);
1168 	amdgpu_bo_free(gtt_max[1]);
1169 	/* clean resources */
1170 	free(resources);
1171 	free(ibs_request);
1172 	free(ib_info);
1173 	free(pm4);
1174 
1175 	/* end of test */
1176 	r = amdgpu_cs_ctx_free(context_handle);
1177 	CU_ASSERT_EQUAL(r, 0);
1178 }
1179 
1180 
amdgpu_command_submission_gfx(void)1181 static void amdgpu_command_submission_gfx(void)
1182 {
1183 	/* write data using the CP */
1184 	amdgpu_command_submission_gfx_cp_write_data();
1185 	/* const fill using the CP */
1186 	amdgpu_command_submission_gfx_cp_const_fill();
1187 	/* copy data using the CP */
1188 	amdgpu_command_submission_gfx_cp_copy_data();
1189 	/* separate IB buffers for multi-IB submission */
1190 	amdgpu_command_submission_gfx_separate_ibs();
1191 	/* shared IB buffer for multi-IB submission */
1192 	amdgpu_command_submission_gfx_shared_ib();
1193 }
1194 
amdgpu_semaphore_test(void)1195 static void amdgpu_semaphore_test(void)
1196 {
1197 	amdgpu_context_handle context_handle[2];
1198 	amdgpu_semaphore_handle sem;
1199 	amdgpu_bo_handle ib_result_handle[2];
1200 	void *ib_result_cpu[2];
1201 	uint64_t ib_result_mc_address[2];
1202 	struct amdgpu_cs_request ibs_request[2] = {0};
1203 	struct amdgpu_cs_ib_info ib_info[2] = {0};
1204 	struct amdgpu_cs_fence fence_status = {0};
1205 	uint32_t *ptr;
1206 	uint32_t expired;
1207 	uint32_t sdma_nop, gfx_nop;
1208 	amdgpu_bo_list_handle bo_list[2];
1209 	amdgpu_va_handle va_handle[2];
1210 	int r, i;
1211 	struct amdgpu_gpu_info gpu_info = {0};
1212 	unsigned gc_ip_type;
1213 
1214 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1215 	CU_ASSERT_EQUAL(r, 0);
1216 
1217 	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1218 			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1219 
1220 	if (family_id == AMDGPU_FAMILY_SI) {
1221 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1222 		gfx_nop = GFX_COMPUTE_NOP_SI;
1223 	} else {
1224 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1225 		gfx_nop = GFX_COMPUTE_NOP;
1226 	}
1227 
1228 	r = amdgpu_cs_create_semaphore(&sem);
1229 	CU_ASSERT_EQUAL(r, 0);
1230 	for (i = 0; i < 2; i++) {
1231 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1232 		CU_ASSERT_EQUAL(r, 0);
1233 
1234 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1235 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1236 					    &ib_result_handle[i], &ib_result_cpu[i],
1237 					    &ib_result_mc_address[i], &va_handle[i]);
1238 		CU_ASSERT_EQUAL(r, 0);
1239 
1240 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1241 				       NULL, &bo_list[i]);
1242 		CU_ASSERT_EQUAL(r, 0);
1243 	}
1244 
1245 	/* 1. same context different engine */
1246 	ptr = ib_result_cpu[0];
1247 	ptr[0] = sdma_nop;
1248 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1249 	ib_info[0].size = 1;
1250 
1251 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1252 	ibs_request[0].number_of_ibs = 1;
1253 	ibs_request[0].ibs = &ib_info[0];
1254 	ibs_request[0].resources = bo_list[0];
1255 	ibs_request[0].fence_info.handle = NULL;
1256 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1257 	CU_ASSERT_EQUAL(r, 0);
1258 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1259 	CU_ASSERT_EQUAL(r, 0);
1260 
1261 	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1262 	CU_ASSERT_EQUAL(r, 0);
1263 	ptr = ib_result_cpu[1];
1264 	ptr[0] = gfx_nop;
1265 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1266 	ib_info[1].size = 1;
1267 
1268 	ibs_request[1].ip_type = gc_ip_type;
1269 	ibs_request[1].number_of_ibs = 1;
1270 	ibs_request[1].ibs = &ib_info[1];
1271 	ibs_request[1].resources = bo_list[1];
1272 	ibs_request[1].fence_info.handle = NULL;
1273 
1274 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1275 	CU_ASSERT_EQUAL(r, 0);
1276 
1277 	fence_status.context = context_handle[0];
1278 	fence_status.ip_type = gc_ip_type;
1279 	fence_status.ip_instance = 0;
1280 	fence_status.fence = ibs_request[1].seq_no;
1281 	r = amdgpu_cs_query_fence_status(&fence_status,
1282 					 500000000, 0, &expired);
1283 	CU_ASSERT_EQUAL(r, 0);
1284 	CU_ASSERT_EQUAL(expired, true);
1285 
1286 	/* 2. same engine different context */
1287 	ptr = ib_result_cpu[0];
1288 	ptr[0] = gfx_nop;
1289 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1290 	ib_info[0].size = 1;
1291 
1292 	ibs_request[0].ip_type = gc_ip_type;
1293 	ibs_request[0].number_of_ibs = 1;
1294 	ibs_request[0].ibs = &ib_info[0];
1295 	ibs_request[0].resources = bo_list[0];
1296 	ibs_request[0].fence_info.handle = NULL;
1297 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1298 	CU_ASSERT_EQUAL(r, 0);
1299 	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1300 	CU_ASSERT_EQUAL(r, 0);
1301 
1302 	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1303 	CU_ASSERT_EQUAL(r, 0);
1304 	ptr = ib_result_cpu[1];
1305 	ptr[0] = gfx_nop;
1306 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1307 	ib_info[1].size = 1;
1308 
1309 	ibs_request[1].ip_type = gc_ip_type;
1310 	ibs_request[1].number_of_ibs = 1;
1311 	ibs_request[1].ibs = &ib_info[1];
1312 	ibs_request[1].resources = bo_list[1];
1313 	ibs_request[1].fence_info.handle = NULL;
1314 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1315 
1316 	CU_ASSERT_EQUAL(r, 0);
1317 
1318 	fence_status.context = context_handle[1];
1319 	fence_status.ip_type = gc_ip_type;
1320 	fence_status.ip_instance = 0;
1321 	fence_status.fence = ibs_request[1].seq_no;
1322 	r = amdgpu_cs_query_fence_status(&fence_status,
1323 					 500000000, 0, &expired);
1324 	CU_ASSERT_EQUAL(r, 0);
1325 	CU_ASSERT_EQUAL(expired, true);
1326 
1327 	for (i = 0; i < 2; i++) {
1328 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1329 					     ib_result_mc_address[i], 4096);
1330 		CU_ASSERT_EQUAL(r, 0);
1331 
1332 		r = amdgpu_bo_list_destroy(bo_list[i]);
1333 		CU_ASSERT_EQUAL(r, 0);
1334 
1335 		r = amdgpu_cs_ctx_free(context_handle[i]);
1336 		CU_ASSERT_EQUAL(r, 0);
1337 	}
1338 
1339 	r = amdgpu_cs_destroy_semaphore(sem);
1340 	CU_ASSERT_EQUAL(r, 0);
1341 }
1342 
amdgpu_command_submission_compute_nop(void)1343 static void amdgpu_command_submission_compute_nop(void)
1344 {
1345 	amdgpu_context_handle context_handle;
1346 	amdgpu_bo_handle ib_result_handle;
1347 	void *ib_result_cpu;
1348 	uint64_t ib_result_mc_address;
1349 	struct amdgpu_cs_request ibs_request;
1350 	struct amdgpu_cs_ib_info ib_info;
1351 	struct amdgpu_cs_fence fence_status;
1352 	uint32_t *ptr;
1353 	uint32_t expired;
1354 	int r, instance;
1355 	amdgpu_bo_list_handle bo_list;
1356 	amdgpu_va_handle va_handle;
1357 	struct drm_amdgpu_info_hw_ip info;
1358 
1359 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1360 	CU_ASSERT_EQUAL(r, 0);
1361 
1362 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1363 	CU_ASSERT_EQUAL(r, 0);
1364 
1365 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1366 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1367 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1368 					    &ib_result_handle, &ib_result_cpu,
1369 					    &ib_result_mc_address, &va_handle);
1370 		CU_ASSERT_EQUAL(r, 0);
1371 
1372 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1373 				       &bo_list);
1374 		CU_ASSERT_EQUAL(r, 0);
1375 
1376 		ptr = ib_result_cpu;
1377 		memset(ptr, 0, 16);
1378 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1379 
1380 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1381 		ib_info.ib_mc_address = ib_result_mc_address;
1382 		ib_info.size = 16;
1383 
1384 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1385 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1386 		ibs_request.ring = instance;
1387 		ibs_request.number_of_ibs = 1;
1388 		ibs_request.ibs = &ib_info;
1389 		ibs_request.resources = bo_list;
1390 		ibs_request.fence_info.handle = NULL;
1391 
1392 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1393 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1394 		CU_ASSERT_EQUAL(r, 0);
1395 
1396 		fence_status.context = context_handle;
1397 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1398 		fence_status.ip_instance = 0;
1399 		fence_status.ring = instance;
1400 		fence_status.fence = ibs_request.seq_no;
1401 
1402 		r = amdgpu_cs_query_fence_status(&fence_status,
1403 						 AMDGPU_TIMEOUT_INFINITE,
1404 						 0, &expired);
1405 		CU_ASSERT_EQUAL(r, 0);
1406 
1407 		r = amdgpu_bo_list_destroy(bo_list);
1408 		CU_ASSERT_EQUAL(r, 0);
1409 
1410 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1411 					     ib_result_mc_address, 4096);
1412 		CU_ASSERT_EQUAL(r, 0);
1413 	}
1414 
1415 	r = amdgpu_cs_ctx_free(context_handle);
1416 	CU_ASSERT_EQUAL(r, 0);
1417 }
1418 
amdgpu_command_submission_compute_cp_write_data(void)1419 static void amdgpu_command_submission_compute_cp_write_data(void)
1420 {
1421 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1422 }
1423 
amdgpu_command_submission_compute_cp_const_fill(void)1424 static void amdgpu_command_submission_compute_cp_const_fill(void)
1425 {
1426 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1427 }
1428 
amdgpu_command_submission_compute_cp_copy_data(void)1429 static void amdgpu_command_submission_compute_cp_copy_data(void)
1430 {
1431 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1432 }
1433 
amdgpu_command_submission_compute(void)1434 static void amdgpu_command_submission_compute(void)
1435 {
1436 	/* write data using the CP */
1437 	amdgpu_command_submission_compute_cp_write_data();
1438 	/* const fill using the CP */
1439 	amdgpu_command_submission_compute_cp_const_fill();
1440 	/* copy data using the CP */
1441 	amdgpu_command_submission_compute_cp_copy_data();
1442 	/* nop test */
1443 	amdgpu_command_submission_compute_nop();
1444 }
1445 
1446 /*
1447  * caller need create/release:
1448  * pm4_src, resources, ib_info, and ibs_request
1449  * submit command stream described in ibs_request and wait for this IB accomplished
1450  */
1451 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1452 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1453 			       amdgpu_context_handle context_handle,
1454 			       unsigned ip_type, int instance, int pm4_dw,
1455 			       uint32_t *pm4_src, int res_cnt,
1456 			       amdgpu_bo_handle *resources,
1457 			       struct amdgpu_cs_ib_info *ib_info,
1458 			       struct amdgpu_cs_request *ibs_request,
1459 			       bool secure)
1460 {
1461 	int r;
1462 	uint32_t expired;
1463 	uint32_t *ring_ptr;
1464 	amdgpu_bo_handle ib_result_handle;
1465 	void *ib_result_cpu;
1466 	uint64_t ib_result_mc_address;
1467 	struct amdgpu_cs_fence fence_status = {0};
1468 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1469 	amdgpu_va_handle va_handle;
1470 
1471 	/* prepare CS */
1472 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1473 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1474 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1475 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1476 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1477 
1478 	/* allocate IB */
1479 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1480 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1481 				    &ib_result_handle, &ib_result_cpu,
1482 				    &ib_result_mc_address, &va_handle);
1483 	CU_ASSERT_EQUAL(r, 0);
1484 
1485 	/* copy PM4 packet to ring from caller */
1486 	ring_ptr = ib_result_cpu;
1487 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1488 
1489 	ib_info->ib_mc_address = ib_result_mc_address;
1490 	ib_info->size = pm4_dw;
1491 	if (secure)
1492 		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1493 
1494 	ibs_request->ip_type = ip_type;
1495 	ibs_request->ring = instance;
1496 	ibs_request->number_of_ibs = 1;
1497 	ibs_request->ibs = ib_info;
1498 	ibs_request->fence_info.handle = NULL;
1499 
1500 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1501 	all_res[res_cnt] = ib_result_handle;
1502 
1503 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1504 				  NULL, &ibs_request->resources);
1505 	CU_ASSERT_EQUAL(r, 0);
1506 
1507 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1508 
1509 	/* submit CS */
1510 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1511 	CU_ASSERT_EQUAL(r, 0);
1512 
1513 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1514 	CU_ASSERT_EQUAL(r, 0);
1515 
1516 	fence_status.ip_type = ip_type;
1517 	fence_status.ip_instance = 0;
1518 	fence_status.ring = ibs_request->ring;
1519 	fence_status.context = context_handle;
1520 	fence_status.fence = ibs_request->seq_no;
1521 
1522 	/* wait for IB accomplished */
1523 	r = amdgpu_cs_query_fence_status(&fence_status,
1524 					 AMDGPU_TIMEOUT_INFINITE,
1525 					 0, &expired);
1526 	CU_ASSERT_EQUAL(r, 0);
1527 	CU_ASSERT_EQUAL(expired, true);
1528 
1529 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1530 				     ib_result_mc_address, 4096);
1531 	CU_ASSERT_EQUAL(r, 0);
1532 }
1533 
1534 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1535 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1536 			   unsigned ip_type, int instance, int pm4_dw,
1537 			   uint32_t *pm4_src, int res_cnt,
1538 			   amdgpu_bo_handle *resources,
1539 			   struct amdgpu_cs_ib_info *ib_info,
1540 			   struct amdgpu_cs_request *ibs_request)
1541 {
1542 	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1543 				       ip_type, instance, pm4_dw, pm4_src,
1544 				       res_cnt, resources, ib_info,
1545 				       ibs_request, false);
1546 }
1547 
1548 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1549 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1550 							  device, unsigned
1551 							  ip_type, bool secure)
1552 {
1553 	const int sdma_write_length = 128;
1554 	const int pm4_dw = 256;
1555 	amdgpu_context_handle context_handle;
1556 	amdgpu_bo_handle bo;
1557 	amdgpu_bo_handle *resources;
1558 	uint32_t *pm4;
1559 	struct amdgpu_cs_ib_info *ib_info;
1560 	struct amdgpu_cs_request *ibs_request;
1561 	uint64_t bo_mc;
1562 	volatile uint32_t *bo_cpu;
1563 	uint32_t bo_cpu_origin;
1564 	int i, j, r, loop, ring_id;
1565 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1566 	amdgpu_va_handle va_handle;
1567 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1568 
1569 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1570 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1571 
1572 	ib_info = calloc(1, sizeof(*ib_info));
1573 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1574 
1575 	ibs_request = calloc(1, sizeof(*ibs_request));
1576 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1577 
1578 	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1579 	CU_ASSERT_EQUAL(r, 0);
1580 
1581 	for (i = 0; secure && (i < 2); i++)
1582 		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1583 
1584 	r = amdgpu_cs_ctx_create(device, &context_handle);
1585 
1586 	CU_ASSERT_EQUAL(r, 0);
1587 
1588 	/* prepare resource */
1589 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1590 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1591 
1592 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1593 		loop = 0;
1594 		while(loop < 2) {
1595 			/* allocate UC bo for sDMA use */
1596 			r = amdgpu_bo_alloc_and_map(device,
1597 						    sdma_write_length * sizeof(uint32_t),
1598 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1599 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1600 						    &bo_mc, &va_handle);
1601 			CU_ASSERT_EQUAL(r, 0);
1602 
1603 			/* clear bo */
1604 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1605 
1606 			resources[0] = bo;
1607 
1608 			/* fulfill PM4: test DMA write-linear */
1609 			i = j = 0;
1610 			if (ip_type == AMDGPU_HW_IP_DMA) {
1611 				if (family_id == AMDGPU_FAMILY_SI)
1612 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1613 								  sdma_write_length);
1614 				else
1615 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1616 							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1617 							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1618 				pm4[i++] = 0xfffffffc & bo_mc;
1619 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1620 				if (family_id >= AMDGPU_FAMILY_AI)
1621 					pm4[i++] = sdma_write_length - 1;
1622 				else if (family_id != AMDGPU_FAMILY_SI)
1623 					pm4[i++] = sdma_write_length;
1624 				while(j++ < sdma_write_length)
1625 					pm4[i++] = 0xdeadbeaf;
1626 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1627 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1628 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1629 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1630 				pm4[i++] = 0xfffffffc & bo_mc;
1631 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1632 				while(j++ < sdma_write_length)
1633 					pm4[i++] = 0xdeadbeaf;
1634 			}
1635 
1636 			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1637 						       ip_type, ring_id, i, pm4,
1638 						       1, resources, ib_info,
1639 						       ibs_request, secure);
1640 
1641 			/* verify if SDMA test result meets with expected */
1642 			i = 0;
1643 			if (!secure) {
1644 				while(i < sdma_write_length) {
1645 					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1646 				}
1647 			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1648 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1649 				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1650 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1651 				 * command, 1-loop_until_compare_satisfied.
1652 				 * single_pass_atomic, 0-lru
1653 				 * engine_sel, 0-micro_engine
1654 				 */
1655 				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1656 							ATOMIC_MEM_COMMAND(1) |
1657 							ATOMIC_MEM_CACHEPOLICAY(0) |
1658 							ATOMIC_MEM_ENGINESEL(0));
1659 				pm4[i++] = 0xfffffffc & bo_mc;
1660 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1661 				pm4[i++] = 0x12345678;
1662 				pm4[i++] = 0x0;
1663 				pm4[i++] = 0xdeadbeaf;
1664 				pm4[i++] = 0x0;
1665 				pm4[i++] = 0x100;
1666 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1667 							ip_type, ring_id, i, pm4,
1668 							1, resources, ib_info,
1669 							ibs_request, true);
1670 			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1671 				/* restore the bo_cpu to compare */
1672 				bo_cpu_origin = bo_cpu[0];
1673 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1674 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1675 				 * loop, 1-loop_until_compare_satisfied.
1676 				 * single_pass_atomic, 0-lru
1677 				 */
1678 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1679 							       0,
1680 							       SDMA_ATOMIC_LOOP(1) |
1681 							       SDMA_ATOMIC_TMZ(1) |
1682 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1683 				pm4[i++] = 0xfffffffc & bo_mc;
1684 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1685 				pm4[i++] = 0x12345678;
1686 				pm4[i++] = 0x0;
1687 				pm4[i++] = 0xdeadbeaf;
1688 				pm4[i++] = 0x0;
1689 				pm4[i++] = 0x100;
1690 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1691 							ip_type, ring_id, i, pm4,
1692 							1, resources, ib_info,
1693 							ibs_request, true);
1694 				/* DMA's atomic behavir is unlike GFX
1695 				 * If the comparing data is not equal to destination data,
1696 				 * For GFX, loop again till gfx timeout(system hang).
1697 				 * For DMA, loop again till timer expired and then send interrupt.
1698 				 * So testcase can't use interrupt mechanism.
1699 				 * We take another way to verify. When the comparing data is not
1700 				 * equal to destination data, overwrite the source data to the destination
1701 				 * buffer. Otherwise, original destination data unchanged.
1702 				 * So if the bo_cpu data is overwritten, the result is passed.
1703 				 */
1704 				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1705 
1706 				/* compare again for the case of dest_data != cmp_data */
1707 				i = 0;
1708 				/* restore again, here dest_data should be */
1709 				bo_cpu_origin = bo_cpu[0];
1710 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1711 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1712 							       0,
1713 							       SDMA_ATOMIC_LOOP(1) |
1714 							       SDMA_ATOMIC_TMZ(1) |
1715 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1716 				pm4[i++] = 0xfffffffc & bo_mc;
1717 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1718 				pm4[i++] = 0x87654321;
1719 				pm4[i++] = 0x0;
1720 				pm4[i++] = 0xdeadbeaf;
1721 				pm4[i++] = 0x0;
1722 				pm4[i++] = 0x100;
1723 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1724 							ip_type, ring_id, i, pm4,
1725 							1, resources, ib_info,
1726 							ibs_request, true);
1727 				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1728 				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1729 			}
1730 
1731 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1732 						     sdma_write_length * sizeof(uint32_t));
1733 			CU_ASSERT_EQUAL(r, 0);
1734 			loop++;
1735 		}
1736 	}
1737 	/* clean resources */
1738 	free(resources);
1739 	free(ibs_request);
1740 	free(ib_info);
1741 	free(pm4);
1742 
1743 	/* end of test */
1744 	r = amdgpu_cs_ctx_free(context_handle);
1745 	CU_ASSERT_EQUAL(r, 0);
1746 }
1747 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1748 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1749 {
1750 	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1751 								  ip_type,
1752 								  false);
1753 }
1754 
amdgpu_command_submission_sdma_write_linear(void)1755 static void amdgpu_command_submission_sdma_write_linear(void)
1756 {
1757 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1758 }
1759 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1760 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1761 {
1762 	const int sdma_write_length = 1024 * 1024;
1763 	const int pm4_dw = 256;
1764 	amdgpu_context_handle context_handle;
1765 	amdgpu_bo_handle bo;
1766 	amdgpu_bo_handle *resources;
1767 	uint32_t *pm4;
1768 	struct amdgpu_cs_ib_info *ib_info;
1769 	struct amdgpu_cs_request *ibs_request;
1770 	uint64_t bo_mc;
1771 	volatile uint32_t *bo_cpu;
1772 	int i, j, r, loop, ring_id;
1773 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1774 	amdgpu_va_handle va_handle;
1775 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1776 
1777 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1778 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1779 
1780 	ib_info = calloc(1, sizeof(*ib_info));
1781 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1782 
1783 	ibs_request = calloc(1, sizeof(*ibs_request));
1784 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1785 
1786 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1787 	CU_ASSERT_EQUAL(r, 0);
1788 
1789 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1790 	CU_ASSERT_EQUAL(r, 0);
1791 
1792 	/* prepare resource */
1793 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1794 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1795 
1796 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1797 		loop = 0;
1798 		while(loop < 2) {
1799 			/* allocate UC bo for sDMA use */
1800 			r = amdgpu_bo_alloc_and_map(device_handle,
1801 						    sdma_write_length, 4096,
1802 						    AMDGPU_GEM_DOMAIN_GTT,
1803 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1804 						    &bo_mc, &va_handle);
1805 			CU_ASSERT_EQUAL(r, 0);
1806 
1807 			/* clear bo */
1808 			memset((void*)bo_cpu, 0, sdma_write_length);
1809 
1810 			resources[0] = bo;
1811 
1812 			/* fulfill PM4: test DMA const fill */
1813 			i = j = 0;
1814 			if (ip_type == AMDGPU_HW_IP_DMA) {
1815 				if (family_id == AMDGPU_FAMILY_SI) {
1816 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1817 								  0, 0, 0,
1818 								  sdma_write_length / 4);
1819 					pm4[i++] = 0xfffffffc & bo_mc;
1820 					pm4[i++] = 0xdeadbeaf;
1821 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1822 				} else {
1823 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1824 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1825 					pm4[i++] = 0xffffffff & bo_mc;
1826 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1827 					pm4[i++] = 0xdeadbeaf;
1828 					if (family_id >= AMDGPU_FAMILY_AI)
1829 						pm4[i++] = sdma_write_length - 1;
1830 					else
1831 						pm4[i++] = sdma_write_length;
1832 				}
1833 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1834 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1835 				if (family_id == AMDGPU_FAMILY_SI) {
1836 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1837 					pm4[i++] = 0xdeadbeaf;
1838 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1839 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1840 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1841 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1842 					pm4[i++] = 0xffffffff & bo_mc;
1843 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1844 					pm4[i++] = sdma_write_length;
1845 				} else {
1846 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1847 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1848 						   PACKET3_DMA_DATA_DST_SEL(0) |
1849 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1850 						   PACKET3_DMA_DATA_CP_SYNC;
1851 					pm4[i++] = 0xdeadbeaf;
1852 					pm4[i++] = 0;
1853 					pm4[i++] = 0xfffffffc & bo_mc;
1854 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1855 					pm4[i++] = sdma_write_length;
1856 				}
1857 			}
1858 
1859 			amdgpu_test_exec_cs_helper(context_handle,
1860 						   ip_type, ring_id,
1861 						   i, pm4,
1862 						   1, resources,
1863 						   ib_info, ibs_request);
1864 
1865 			/* verify if SDMA test result meets with expected */
1866 			i = 0;
1867 			while(i < (sdma_write_length / 4)) {
1868 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1869 			}
1870 
1871 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1872 						     sdma_write_length);
1873 			CU_ASSERT_EQUAL(r, 0);
1874 			loop++;
1875 		}
1876 	}
1877 	/* clean resources */
1878 	free(resources);
1879 	free(ibs_request);
1880 	free(ib_info);
1881 	free(pm4);
1882 
1883 	/* end of test */
1884 	r = amdgpu_cs_ctx_free(context_handle);
1885 	CU_ASSERT_EQUAL(r, 0);
1886 }
1887 
amdgpu_command_submission_sdma_const_fill(void)1888 static void amdgpu_command_submission_sdma_const_fill(void)
1889 {
1890 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1891 }
1892 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1893 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1894 {
1895 	const int sdma_write_length = 1024;
1896 	const int pm4_dw = 256;
1897 	amdgpu_context_handle context_handle;
1898 	amdgpu_bo_handle bo1, bo2;
1899 	amdgpu_bo_handle *resources;
1900 	uint32_t *pm4;
1901 	struct amdgpu_cs_ib_info *ib_info;
1902 	struct amdgpu_cs_request *ibs_request;
1903 	uint64_t bo1_mc, bo2_mc;
1904 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1905 	int i, j, r, loop1, loop2, ring_id;
1906 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1907 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1908 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1909 
1910 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1911 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1912 
1913 	ib_info = calloc(1, sizeof(*ib_info));
1914 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1915 
1916 	ibs_request = calloc(1, sizeof(*ibs_request));
1917 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1918 
1919 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1920 	CU_ASSERT_EQUAL(r, 0);
1921 
1922 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1923 	CU_ASSERT_EQUAL(r, 0);
1924 
1925 	/* prepare resource */
1926 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1927 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1928 
1929 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1930 		loop1 = loop2 = 0;
1931 		/* run 9 circle to test all mapping combination */
1932 		while(loop1 < 2) {
1933 			while(loop2 < 2) {
1934 				/* allocate UC bo1for sDMA use */
1935 				r = amdgpu_bo_alloc_and_map(device_handle,
1936 							    sdma_write_length, 4096,
1937 							    AMDGPU_GEM_DOMAIN_GTT,
1938 							    gtt_flags[loop1], &bo1,
1939 							    (void**)&bo1_cpu, &bo1_mc,
1940 							    &bo1_va_handle);
1941 				CU_ASSERT_EQUAL(r, 0);
1942 
1943 				/* set bo1 */
1944 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1945 
1946 				/* allocate UC bo2 for sDMA use */
1947 				r = amdgpu_bo_alloc_and_map(device_handle,
1948 							    sdma_write_length, 4096,
1949 							    AMDGPU_GEM_DOMAIN_GTT,
1950 							    gtt_flags[loop2], &bo2,
1951 							    (void**)&bo2_cpu, &bo2_mc,
1952 							    &bo2_va_handle);
1953 				CU_ASSERT_EQUAL(r, 0);
1954 
1955 				/* clear bo2 */
1956 				memset((void*)bo2_cpu, 0, sdma_write_length);
1957 
1958 				resources[0] = bo1;
1959 				resources[1] = bo2;
1960 
1961 				/* fulfill PM4: test DMA copy linear */
1962 				i = j = 0;
1963 				if (ip_type == AMDGPU_HW_IP_DMA) {
1964 					if (family_id == AMDGPU_FAMILY_SI) {
1965 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1966 									  0, 0, 0,
1967 									  sdma_write_length);
1968 						pm4[i++] = 0xffffffff & bo2_mc;
1969 						pm4[i++] = 0xffffffff & bo1_mc;
1970 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1971 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1972 					} else {
1973 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1974 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1975 								       0);
1976 						if (family_id >= AMDGPU_FAMILY_AI)
1977 							pm4[i++] = sdma_write_length - 1;
1978 						else
1979 							pm4[i++] = sdma_write_length;
1980 						pm4[i++] = 0;
1981 						pm4[i++] = 0xffffffff & bo1_mc;
1982 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1983 						pm4[i++] = 0xffffffff & bo2_mc;
1984 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985 					}
1986 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1987 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1988 					if (family_id == AMDGPU_FAMILY_SI) {
1989 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1990 						pm4[i++] = 0xfffffffc & bo1_mc;
1991 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1992 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1993 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1994 							   PACKET3_DMA_DATA_SI_CP_SYNC |
1995 							   (0xffff00000000 & bo1_mc) >> 32;
1996 						pm4[i++] = 0xfffffffc & bo2_mc;
1997 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1998 						pm4[i++] = sdma_write_length;
1999 					} else {
2000 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2001 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2002 							   PACKET3_DMA_DATA_DST_SEL(0) |
2003 							   PACKET3_DMA_DATA_SRC_SEL(0) |
2004 							   PACKET3_DMA_DATA_CP_SYNC;
2005 						pm4[i++] = 0xfffffffc & bo1_mc;
2006 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2007 						pm4[i++] = 0xfffffffc & bo2_mc;
2008 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2009 						pm4[i++] = sdma_write_length;
2010 					}
2011 				}
2012 
2013 				amdgpu_test_exec_cs_helper(context_handle,
2014 							   ip_type, ring_id,
2015 							   i, pm4,
2016 							   2, resources,
2017 							   ib_info, ibs_request);
2018 
2019 				/* verify if SDMA test result meets with expected */
2020 				i = 0;
2021 				while(i < sdma_write_length) {
2022 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2023 				}
2024 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2025 							     sdma_write_length);
2026 				CU_ASSERT_EQUAL(r, 0);
2027 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2028 							     sdma_write_length);
2029 				CU_ASSERT_EQUAL(r, 0);
2030 				loop2++;
2031 			}
2032 			loop1++;
2033 		}
2034 	}
2035 	/* clean resources */
2036 	free(resources);
2037 	free(ibs_request);
2038 	free(ib_info);
2039 	free(pm4);
2040 
2041 	/* end of test */
2042 	r = amdgpu_cs_ctx_free(context_handle);
2043 	CU_ASSERT_EQUAL(r, 0);
2044 }
2045 
amdgpu_command_submission_sdma_copy_linear(void)2046 static void amdgpu_command_submission_sdma_copy_linear(void)
2047 {
2048 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2049 }
2050 
amdgpu_command_submission_sdma(void)2051 static void amdgpu_command_submission_sdma(void)
2052 {
2053 	amdgpu_command_submission_sdma_write_linear();
2054 	amdgpu_command_submission_sdma_const_fill();
2055 	amdgpu_command_submission_sdma_copy_linear();
2056 }
2057 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)2058 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2059 {
2060 	amdgpu_context_handle context_handle;
2061 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2062 	void *ib_result_cpu, *ib_result_ce_cpu;
2063 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2064 	struct amdgpu_cs_request ibs_request[2] = {0};
2065 	struct amdgpu_cs_ib_info ib_info[2];
2066 	struct amdgpu_cs_fence fence_status[2] = {0};
2067 	uint32_t *ptr;
2068 	uint32_t expired;
2069 	amdgpu_bo_list_handle bo_list;
2070 	amdgpu_va_handle va_handle, va_handle_ce;
2071 	int r;
2072 	int i = 0, ib_cs_num = 2;
2073 
2074 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2075 	CU_ASSERT_EQUAL(r, 0);
2076 
2077 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2078 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2079 				    &ib_result_handle, &ib_result_cpu,
2080 				    &ib_result_mc_address, &va_handle);
2081 	CU_ASSERT_EQUAL(r, 0);
2082 
2083 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2084 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2085 				    &ib_result_ce_handle, &ib_result_ce_cpu,
2086 				    &ib_result_ce_mc_address, &va_handle_ce);
2087 	CU_ASSERT_EQUAL(r, 0);
2088 
2089 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2090 			       ib_result_ce_handle, &bo_list);
2091 	CU_ASSERT_EQUAL(r, 0);
2092 
2093 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2094 
2095 	/* IT_SET_CE_DE_COUNTERS */
2096 	ptr = ib_result_ce_cpu;
2097 	if (family_id != AMDGPU_FAMILY_SI) {
2098 		ptr[i++] = 0xc0008900;
2099 		ptr[i++] = 0;
2100 	}
2101 	ptr[i++] = 0xc0008400;
2102 	ptr[i++] = 1;
2103 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2104 	ib_info[0].size = i;
2105 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2106 
2107 	/* IT_WAIT_ON_CE_COUNTER */
2108 	ptr = ib_result_cpu;
2109 	ptr[0] = 0xc0008600;
2110 	ptr[1] = 0x00000001;
2111 	ib_info[1].ib_mc_address = ib_result_mc_address;
2112 	ib_info[1].size = 2;
2113 
2114 	for (i = 0; i < ib_cs_num; i++) {
2115 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2116 		ibs_request[i].number_of_ibs = 2;
2117 		ibs_request[i].ibs = ib_info;
2118 		ibs_request[i].resources = bo_list;
2119 		ibs_request[i].fence_info.handle = NULL;
2120 	}
2121 
2122 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2123 
2124 	CU_ASSERT_EQUAL(r, 0);
2125 
2126 	for (i = 0; i < ib_cs_num; i++) {
2127 		fence_status[i].context = context_handle;
2128 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2129 		fence_status[i].fence = ibs_request[i].seq_no;
2130 	}
2131 
2132 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2133 				AMDGPU_TIMEOUT_INFINITE,
2134 				&expired, NULL);
2135 	CU_ASSERT_EQUAL(r, 0);
2136 
2137 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2138 				     ib_result_mc_address, 4096);
2139 	CU_ASSERT_EQUAL(r, 0);
2140 
2141 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2142 				     ib_result_ce_mc_address, 4096);
2143 	CU_ASSERT_EQUAL(r, 0);
2144 
2145 	r = amdgpu_bo_list_destroy(bo_list);
2146 	CU_ASSERT_EQUAL(r, 0);
2147 
2148 	r = amdgpu_cs_ctx_free(context_handle);
2149 	CU_ASSERT_EQUAL(r, 0);
2150 }
2151 
amdgpu_command_submission_multi_fence(void)2152 static void amdgpu_command_submission_multi_fence(void)
2153 {
2154 	amdgpu_command_submission_multi_fence_wait_all(true);
2155 	amdgpu_command_submission_multi_fence_wait_all(false);
2156 }
2157 
amdgpu_userptr_test(void)2158 static void amdgpu_userptr_test(void)
2159 {
2160 	int i, r, j;
2161 	uint32_t *pm4 = NULL;
2162 	uint64_t bo_mc;
2163 	void *ptr = NULL;
2164 	int pm4_dw = 256;
2165 	int sdma_write_length = 4;
2166 	amdgpu_bo_handle handle;
2167 	amdgpu_context_handle context_handle;
2168 	struct amdgpu_cs_ib_info *ib_info;
2169 	struct amdgpu_cs_request *ibs_request;
2170 	amdgpu_bo_handle buf_handle;
2171 	amdgpu_va_handle va_handle;
2172 
2173 	pm4 = calloc(pm4_dw, sizeof(*pm4));
2174 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2175 
2176 	ib_info = calloc(1, sizeof(*ib_info));
2177 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2178 
2179 	ibs_request = calloc(1, sizeof(*ibs_request));
2180 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2181 
2182 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2183 	CU_ASSERT_EQUAL(r, 0);
2184 
2185 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2186 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2187 	memset(ptr, 0, BUFFER_SIZE);
2188 
2189 	r = amdgpu_create_bo_from_user_mem(device_handle,
2190 					   ptr, BUFFER_SIZE, &buf_handle);
2191 	CU_ASSERT_EQUAL(r, 0);
2192 
2193 	r = amdgpu_va_range_alloc(device_handle,
2194 				  amdgpu_gpu_va_range_general,
2195 				  BUFFER_SIZE, 1, 0, &bo_mc,
2196 				  &va_handle, 0);
2197 	CU_ASSERT_EQUAL(r, 0);
2198 
2199 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2200 	CU_ASSERT_EQUAL(r, 0);
2201 
2202 	handle = buf_handle;
2203 
2204 	j = i = 0;
2205 
2206 	if (family_id == AMDGPU_FAMILY_SI)
2207 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2208 				sdma_write_length);
2209 	else
2210 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2211 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2212 	pm4[i++] = 0xffffffff & bo_mc;
2213 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2214 	if (family_id >= AMDGPU_FAMILY_AI)
2215 		pm4[i++] = sdma_write_length - 1;
2216 	else if (family_id != AMDGPU_FAMILY_SI)
2217 		pm4[i++] = sdma_write_length;
2218 
2219 	while (j++ < sdma_write_length)
2220 		pm4[i++] = 0xdeadbeaf;
2221 
2222 	if (!fork()) {
2223 		pm4[0] = 0x0;
2224 		exit(0);
2225 	}
2226 
2227 	amdgpu_test_exec_cs_helper(context_handle,
2228 				   AMDGPU_HW_IP_DMA, 0,
2229 				   i, pm4,
2230 				   1, &handle,
2231 				   ib_info, ibs_request);
2232 	i = 0;
2233 	while (i < sdma_write_length) {
2234 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2235 	}
2236 	free(ibs_request);
2237 	free(ib_info);
2238 	free(pm4);
2239 
2240 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2241 	CU_ASSERT_EQUAL(r, 0);
2242 	r = amdgpu_va_range_free(va_handle);
2243 	CU_ASSERT_EQUAL(r, 0);
2244 	r = amdgpu_bo_free(buf_handle);
2245 	CU_ASSERT_EQUAL(r, 0);
2246 	free(ptr);
2247 
2248 	r = amdgpu_cs_ctx_free(context_handle);
2249 	CU_ASSERT_EQUAL(r, 0);
2250 
2251 	wait(NULL);
2252 }
2253 
amdgpu_sync_dependency_test(void)2254 static void amdgpu_sync_dependency_test(void)
2255 {
2256 	amdgpu_context_handle context_handle[2];
2257 	amdgpu_bo_handle ib_result_handle;
2258 	void *ib_result_cpu;
2259 	uint64_t ib_result_mc_address;
2260 	struct amdgpu_cs_request ibs_request;
2261 	struct amdgpu_cs_ib_info ib_info;
2262 	struct amdgpu_cs_fence fence_status;
2263 	uint32_t expired;
2264 	int i, j, r;
2265 	amdgpu_bo_list_handle bo_list;
2266 	amdgpu_va_handle va_handle;
2267 	static uint32_t *ptr;
2268 	uint64_t seq_no;
2269 
2270 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2271 	CU_ASSERT_EQUAL(r, 0);
2272 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2273 	CU_ASSERT_EQUAL(r, 0);
2274 
2275 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2276 			AMDGPU_GEM_DOMAIN_GTT, 0,
2277 						    &ib_result_handle, &ib_result_cpu,
2278 						    &ib_result_mc_address, &va_handle);
2279 	CU_ASSERT_EQUAL(r, 0);
2280 
2281 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2282 			       &bo_list);
2283 	CU_ASSERT_EQUAL(r, 0);
2284 
2285 	ptr = ib_result_cpu;
2286 	i = 0;
2287 
2288 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2289 
2290 	/* Dispatch minimal init config and verify it's executed */
2291 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2292 	ptr[i++] = 0x80000000;
2293 	ptr[i++] = 0x80000000;
2294 
2295 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2296 	ptr[i++] = 0x80000000;
2297 
2298 
2299 	/* Program compute regs */
2300 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2301 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2302 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2303 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2304 
2305 
2306 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2307 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2308 	/*
2309 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2310 	                                      SGPRS = 1
2311 	                                      PRIORITY = 0
2312 	                                      FLOAT_MODE = 192 (0xc0)
2313 	                                      PRIV = 0
2314 	                                      DX10_CLAMP = 1
2315 	                                      DEBUG_MODE = 0
2316 	                                      IEEE_MODE = 0
2317 	                                      BULKY = 0
2318 	                                      CDBG_USER = 0
2319 	 *
2320 	 */
2321 	ptr[i++] = 0x002c0040;
2322 
2323 
2324 	/*
2325 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2326 	                                      USER_SGPR = 8
2327 	                                      TRAP_PRESENT = 0
2328 	                                      TGID_X_EN = 0
2329 	                                      TGID_Y_EN = 0
2330 	                                      TGID_Z_EN = 0
2331 	                                      TG_SIZE_EN = 0
2332 	                                      TIDIG_COMP_CNT = 0
2333 	                                      EXCP_EN_MSB = 0
2334 	                                      LDS_SIZE = 0
2335 	                                      EXCP_EN = 0
2336 	 *
2337 	 */
2338 	ptr[i++] = 0x00000010;
2339 
2340 
2341 /*
2342  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2343                                          WAVESIZE = 0
2344  *
2345  */
2346 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2347 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2348 	ptr[i++] = 0x00000100;
2349 
2350 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2351 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2352 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2353 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2354 
2355 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2356 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2357 	ptr[i++] = 0;
2358 
2359 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2360 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2361 	ptr[i++] = 1;
2362 	ptr[i++] = 1;
2363 	ptr[i++] = 1;
2364 
2365 
2366 	/* Dispatch */
2367 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2368 	ptr[i++] = 1;
2369 	ptr[i++] = 1;
2370 	ptr[i++] = 1;
2371 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2372 
2373 
2374 	while (i & 7)
2375 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2376 
2377 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2378 	ib_info.ib_mc_address = ib_result_mc_address;
2379 	ib_info.size = i;
2380 
2381 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2382 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2383 	ibs_request.ring = 0;
2384 	ibs_request.number_of_ibs = 1;
2385 	ibs_request.ibs = &ib_info;
2386 	ibs_request.resources = bo_list;
2387 	ibs_request.fence_info.handle = NULL;
2388 
2389 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2390 	CU_ASSERT_EQUAL(r, 0);
2391 	seq_no = ibs_request.seq_no;
2392 
2393 
2394 
2395 	/* Prepare second command with dependency on the first */
2396 	j = i;
2397 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2398 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2399 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2400 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2401 	ptr[i++] = 99;
2402 
2403 	while (i & 7)
2404 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2405 
2406 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2407 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2408 	ib_info.size = i - j;
2409 
2410 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2411 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2412 	ibs_request.ring = 0;
2413 	ibs_request.number_of_ibs = 1;
2414 	ibs_request.ibs = &ib_info;
2415 	ibs_request.resources = bo_list;
2416 	ibs_request.fence_info.handle = NULL;
2417 
2418 	ibs_request.number_of_dependencies = 1;
2419 
2420 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2421 	ibs_request.dependencies[0].context = context_handle[1];
2422 	ibs_request.dependencies[0].ip_instance = 0;
2423 	ibs_request.dependencies[0].ring = 0;
2424 	ibs_request.dependencies[0].fence = seq_no;
2425 
2426 
2427 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2428 	CU_ASSERT_EQUAL(r, 0);
2429 
2430 
2431 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2432 	fence_status.context = context_handle[0];
2433 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2434 	fence_status.ip_instance = 0;
2435 	fence_status.ring = 0;
2436 	fence_status.fence = ibs_request.seq_no;
2437 
2438 	r = amdgpu_cs_query_fence_status(&fence_status,
2439 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2440 	CU_ASSERT_EQUAL(r, 0);
2441 
2442 	/* Expect the second command to wait for shader to complete */
2443 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2444 
2445 	r = amdgpu_bo_list_destroy(bo_list);
2446 	CU_ASSERT_EQUAL(r, 0);
2447 
2448 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2449 				     ib_result_mc_address, 4096);
2450 	CU_ASSERT_EQUAL(r, 0);
2451 
2452 	r = amdgpu_cs_ctx_free(context_handle[0]);
2453 	CU_ASSERT_EQUAL(r, 0);
2454 	r = amdgpu_cs_ctx_free(context_handle[1]);
2455 	CU_ASSERT_EQUAL(r, 0);
2456 
2457 	free(ibs_request.dependencies);
2458 }
2459 
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2460 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2461 {
2462 	struct amdgpu_test_shader *shader;
2463 	int i, loop = 0x10000;
2464 
2465 	switch (family) {
2466 		case AMDGPU_FAMILY_AI:
2467 			shader = &memcpy_cs_hang_slow_ai;
2468 			break;
2469 		case AMDGPU_FAMILY_RV:
2470 			shader = &memcpy_cs_hang_slow_rv;
2471 			break;
2472 		case AMDGPU_FAMILY_NV:
2473 			shader = &memcpy_cs_hang_slow_nv;
2474 			break;
2475 		default:
2476 			return -1;
2477 			break;
2478 	}
2479 
2480 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2481 
2482 	for (i = 0; i < loop; i++)
2483 		memcpy(ptr + shader->header_length + shader->body_length * i,
2484 			shader->shader + shader->header_length,
2485 			shader->body_length * sizeof(uint32_t));
2486 
2487 	memcpy(ptr + shader->header_length + shader->body_length * loop,
2488 		shader->shader + shader->header_length + shader->body_length,
2489 		shader->foot_length * sizeof(uint32_t));
2490 
2491 	return 0;
2492 }
2493 
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type,uint32_t version)2494 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2495 					   int cs_type,
2496 					   uint32_t version)
2497 {
2498 	uint32_t shader_size;
2499 	const uint32_t *shader;
2500 
2501 	switch (cs_type) {
2502 		case CS_BUFFERCLEAR:
2503 			if (version == 9) {
2504 				shader = bufferclear_cs_shader_gfx9;
2505 				shader_size = sizeof(bufferclear_cs_shader_gfx9);
2506 			} else if (version == 10) {
2507 				shader = bufferclear_cs_shader_gfx10;
2508 				shader_size = sizeof(bufferclear_cs_shader_gfx10);
2509 			}
2510 			break;
2511 		case CS_BUFFERCOPY:
2512 			if (version == 9) {
2513 				shader = buffercopy_cs_shader_gfx9;
2514 				shader_size = sizeof(buffercopy_cs_shader_gfx9);
2515 			} else if (version == 10) {
2516 				shader = buffercopy_cs_shader_gfx10;
2517 				shader_size = sizeof(buffercopy_cs_shader_gfx10);
2518 			}
2519 			break;
2520 		case CS_HANG:
2521 			shader = memcpy_ps_hang;
2522 			shader_size = sizeof(memcpy_ps_hang);
2523 			break;
2524 		default:
2525 			return -1;
2526 			break;
2527 	}
2528 
2529 	memcpy(ptr, shader, shader_size);
2530 	return 0;
2531 }
2532 
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type,uint32_t version)2533 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version)
2534 {
2535 	int i = 0;
2536 
2537 	/* Write context control and load shadowing register if necessary */
2538 	if (ip_type == AMDGPU_HW_IP_GFX) {
2539 		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2540 		ptr[i++] = 0x80000000;
2541 		ptr[i++] = 0x80000000;
2542 	}
2543 
2544 	/* Issue commands to set default compute state. */
2545 	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2546 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2547 	ptr[i++] = 0x204;
2548 	i += 3;
2549 
2550 	/* clear mmCOMPUTE_TMPRING_SIZE */
2551 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2552 	ptr[i++] = 0x218;
2553 	ptr[i++] = 0;
2554 
2555 	/* Set new sh registers in GFX10 to 0 */
2556 	if (version == 10) {
2557 		/* mmCOMPUTE_SHADER_CHKSUM */
2558 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2559 		ptr[i++] = 0x22a;
2560 		ptr[i++] = 0;
2561 		/* mmCOMPUTE_REQ_CTRL */
2562 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6);
2563 		ptr[i++] = 0x222;
2564 		i += 6;
2565 		/* mmCP_COHER_START_DELAY */
2566 		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2567 		ptr[i++] = 0x7b;
2568 		ptr[i++] = 0x20;
2569 	}
2570 	return i;
2571 }
2572 
amdgpu_dispatch_write_cumask(uint32_t * ptr,uint32_t version)2573 static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version)
2574 {
2575 	int i = 0;
2576 
2577 	/*  Issue commands to set cu mask used in current dispatch */
2578 	if (version == 9) {
2579 		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2580 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2581 		ptr[i++] = 0x216;
2582 		ptr[i++] = 0xffffffff;
2583 		ptr[i++] = 0xffffffff;
2584 		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2585 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2586 		ptr[i++] = 0x219;
2587 		ptr[i++] = 0xffffffff;
2588 		ptr[i++] = 0xffffffff;
2589 	} else if (version == 10) {
2590 		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2591 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2592 		ptr[i++] = 0x30000216;
2593 		ptr[i++] = 0xffffffff;
2594 		ptr[i++] = 0xffffffff;
2595 		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2596 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2597 		ptr[i++] = 0x30000219;
2598 		ptr[i++] = 0xffffffff;
2599 		ptr[i++] = 0xffffffff;
2600 	}
2601 
2602 	return i;
2603 }
2604 
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr,uint32_t version)2605 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version)
2606 {
2607 	int i, j;
2608 
2609 	i = 0;
2610 
2611 	/* Writes shader state to HW */
2612 	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2613 	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2614 	ptr[i++] = 0x20c;
2615 	ptr[i++] = (shader_addr >> 8);
2616 	ptr[i++] = (shader_addr >> 40);
2617 	/* write sh regs*/
2618 	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2619 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2620 		/* - Gfx9ShRegBase */
2621 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2622 		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2623 	}
2624 
2625 	if (version == 10) {
2626 		/* mmCOMPUTE_PGM_RSRC3 */
2627 		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2628 		ptr[i++] = 0x228;
2629 		ptr[i++] = 0;
2630 	}
2631 
2632 	return i;
2633 }
2634 
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,uint32_t version)2635 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2636 					 uint32_t ip_type,
2637 					 uint32_t ring,
2638 					 uint32_t version)
2639 {
2640 	amdgpu_context_handle context_handle;
2641 	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2642 	volatile unsigned char *ptr_dst;
2643 	void *ptr_shader;
2644 	uint32_t *ptr_cmd;
2645 	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2646 	amdgpu_va_handle va_dst, va_shader, va_cmd;
2647 	int i, r;
2648 	int bo_dst_size = 16384;
2649 	int bo_shader_size = 4096;
2650 	int bo_cmd_size = 4096;
2651 	struct amdgpu_cs_request ibs_request = {0};
2652 	struct amdgpu_cs_ib_info ib_info= {0};
2653 	amdgpu_bo_list_handle bo_list;
2654 	struct amdgpu_cs_fence fence_status = {0};
2655 	uint32_t expired;
2656 
2657 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2658 	CU_ASSERT_EQUAL(r, 0);
2659 
2660 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2661 					AMDGPU_GEM_DOMAIN_GTT, 0,
2662 					&bo_cmd, (void **)&ptr_cmd,
2663 					&mc_address_cmd, &va_cmd);
2664 	CU_ASSERT_EQUAL(r, 0);
2665 	memset(ptr_cmd, 0, bo_cmd_size);
2666 
2667 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2668 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2669 					&bo_shader, &ptr_shader,
2670 					&mc_address_shader, &va_shader);
2671 	CU_ASSERT_EQUAL(r, 0);
2672 	memset(ptr_shader, 0, bo_shader_size);
2673 
2674 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version);
2675 	CU_ASSERT_EQUAL(r, 0);
2676 
2677 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2678 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2679 					&bo_dst, (void **)&ptr_dst,
2680 					&mc_address_dst, &va_dst);
2681 	CU_ASSERT_EQUAL(r, 0);
2682 
2683 	i = 0;
2684 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2685 
2686 	/*  Issue commands to set cu mask used in current dispatch */
2687 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2688 
2689 	/* Writes shader state to HW */
2690 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2691 
2692 	/* Write constant data */
2693 	/* Writes the UAV constant data to the SGPRs. */
2694 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2695 	ptr_cmd[i++] = 0x240;
2696 	ptr_cmd[i++] = mc_address_dst;
2697 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2698 	ptr_cmd[i++] = 0x400;
2699 	if (version == 9)
2700 		ptr_cmd[i++] = 0x74fac;
2701 	else if (version == 10)
2702 		ptr_cmd[i++] = 0x1104bfac;
2703 
2704 	/* Sets a range of pixel shader constants */
2705 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2706 	ptr_cmd[i++] = 0x244;
2707 	ptr_cmd[i++] = 0x22222222;
2708 	ptr_cmd[i++] = 0x22222222;
2709 	ptr_cmd[i++] = 0x22222222;
2710 	ptr_cmd[i++] = 0x22222222;
2711 
2712 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2713 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2714 	ptr_cmd[i++] = 0x215;
2715 	ptr_cmd[i++] = 0;
2716 
2717 	/* dispatch direct command */
2718 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2719 	ptr_cmd[i++] = 0x10;
2720 	ptr_cmd[i++] = 1;
2721 	ptr_cmd[i++] = 1;
2722 	ptr_cmd[i++] = 1;
2723 
2724 	while (i & 7)
2725 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2726 
2727 	resources[0] = bo_dst;
2728 	resources[1] = bo_shader;
2729 	resources[2] = bo_cmd;
2730 	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2731 	CU_ASSERT_EQUAL(r, 0);
2732 
2733 	ib_info.ib_mc_address = mc_address_cmd;
2734 	ib_info.size = i;
2735 	ibs_request.ip_type = ip_type;
2736 	ibs_request.ring = ring;
2737 	ibs_request.resources = bo_list;
2738 	ibs_request.number_of_ibs = 1;
2739 	ibs_request.ibs = &ib_info;
2740 	ibs_request.fence_info.handle = NULL;
2741 
2742 	/* submit CS */
2743 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2744 	CU_ASSERT_EQUAL(r, 0);
2745 
2746 	r = amdgpu_bo_list_destroy(bo_list);
2747 	CU_ASSERT_EQUAL(r, 0);
2748 
2749 	fence_status.ip_type = ip_type;
2750 	fence_status.ip_instance = 0;
2751 	fence_status.ring = ring;
2752 	fence_status.context = context_handle;
2753 	fence_status.fence = ibs_request.seq_no;
2754 
2755 	/* wait for IB accomplished */
2756 	r = amdgpu_cs_query_fence_status(&fence_status,
2757 					 AMDGPU_TIMEOUT_INFINITE,
2758 					 0, &expired);
2759 	CU_ASSERT_EQUAL(r, 0);
2760 	CU_ASSERT_EQUAL(expired, true);
2761 
2762 	/* verify if memset test result meets with expected */
2763 	i = 0;
2764 	while(i < bo_dst_size) {
2765 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2766 	}
2767 
2768 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2769 	CU_ASSERT_EQUAL(r, 0);
2770 
2771 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2772 	CU_ASSERT_EQUAL(r, 0);
2773 
2774 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2775 	CU_ASSERT_EQUAL(r, 0);
2776 
2777 	r = amdgpu_cs_ctx_free(context_handle);
2778 	CU_ASSERT_EQUAL(r, 0);
2779 }
2780 
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,uint32_t version,int hang)2781 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2782 					uint32_t ip_type,
2783 					uint32_t ring,
2784 					uint32_t version,
2785 					int hang)
2786 {
2787 	amdgpu_context_handle context_handle;
2788 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2789 	volatile unsigned char *ptr_dst;
2790 	void *ptr_shader;
2791 	unsigned char *ptr_src;
2792 	uint32_t *ptr_cmd;
2793 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2794 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2795 	int i, r;
2796 	int bo_dst_size = 16384;
2797 	int bo_shader_size = 4096;
2798 	int bo_cmd_size = 4096;
2799 	struct amdgpu_cs_request ibs_request = {0};
2800 	struct amdgpu_cs_ib_info ib_info= {0};
2801 	uint32_t expired, hang_state, hangs;
2802 	enum cs_type cs_type;
2803 	amdgpu_bo_list_handle bo_list;
2804 	struct amdgpu_cs_fence fence_status = {0};
2805 
2806 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2807 	CU_ASSERT_EQUAL(r, 0);
2808 
2809 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2810 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2811 				    &bo_cmd, (void **)&ptr_cmd,
2812 				    &mc_address_cmd, &va_cmd);
2813 	CU_ASSERT_EQUAL(r, 0);
2814 	memset(ptr_cmd, 0, bo_cmd_size);
2815 
2816 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2817 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2818 					&bo_shader, &ptr_shader,
2819 					&mc_address_shader, &va_shader);
2820 	CU_ASSERT_EQUAL(r, 0);
2821 	memset(ptr_shader, 0, bo_shader_size);
2822 
2823 	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2824 	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
2825 	CU_ASSERT_EQUAL(r, 0);
2826 
2827 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2828 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2829 					&bo_src, (void **)&ptr_src,
2830 					&mc_address_src, &va_src);
2831 	CU_ASSERT_EQUAL(r, 0);
2832 
2833 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2834 					AMDGPU_GEM_DOMAIN_VRAM, 0,
2835 					&bo_dst, (void **)&ptr_dst,
2836 					&mc_address_dst, &va_dst);
2837 	CU_ASSERT_EQUAL(r, 0);
2838 
2839 	memset(ptr_src, 0x55, bo_dst_size);
2840 
2841 	i = 0;
2842 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2843 
2844 	/*  Issue commands to set cu mask used in current dispatch */
2845 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2846 
2847 	/* Writes shader state to HW */
2848 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2849 
2850 	/* Write constant data */
2851 	/* Writes the texture resource constants data to the SGPRs */
2852 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2853 	ptr_cmd[i++] = 0x240;
2854 	ptr_cmd[i++] = mc_address_src;
2855 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2856 	ptr_cmd[i++] = 0x400;
2857 	if (version == 9)
2858 		ptr_cmd[i++] = 0x74fac;
2859 	else if (version == 10)
2860 		ptr_cmd[i++] = 0x1104bfac;
2861 
2862 	/* Writes the UAV constant data to the SGPRs. */
2863 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864 	ptr_cmd[i++] = 0x244;
2865 	ptr_cmd[i++] = mc_address_dst;
2866 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2867 	ptr_cmd[i++] = 0x400;
2868 	if (version == 9)
2869 		ptr_cmd[i++] = 0x74fac;
2870 	else if (version == 10)
2871 		ptr_cmd[i++] = 0x1104bfac;
2872 
2873 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2874 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2875 	ptr_cmd[i++] = 0x215;
2876 	ptr_cmd[i++] = 0;
2877 
2878 	/* dispatch direct command */
2879 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2880 	ptr_cmd[i++] = 0x10;
2881 	ptr_cmd[i++] = 1;
2882 	ptr_cmd[i++] = 1;
2883 	ptr_cmd[i++] = 1;
2884 
2885 	while (i & 7)
2886 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2887 
2888 	resources[0] = bo_shader;
2889 	resources[1] = bo_src;
2890 	resources[2] = bo_dst;
2891 	resources[3] = bo_cmd;
2892 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2893 	CU_ASSERT_EQUAL(r, 0);
2894 
2895 	ib_info.ib_mc_address = mc_address_cmd;
2896 	ib_info.size = i;
2897 	ibs_request.ip_type = ip_type;
2898 	ibs_request.ring = ring;
2899 	ibs_request.resources = bo_list;
2900 	ibs_request.number_of_ibs = 1;
2901 	ibs_request.ibs = &ib_info;
2902 	ibs_request.fence_info.handle = NULL;
2903 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2904 	CU_ASSERT_EQUAL(r, 0);
2905 
2906 	fence_status.ip_type = ip_type;
2907 	fence_status.ip_instance = 0;
2908 	fence_status.ring = ring;
2909 	fence_status.context = context_handle;
2910 	fence_status.fence = ibs_request.seq_no;
2911 
2912 	/* wait for IB accomplished */
2913 	r = amdgpu_cs_query_fence_status(&fence_status,
2914 					 AMDGPU_TIMEOUT_INFINITE,
2915 					 0, &expired);
2916 
2917 	if (!hang) {
2918 		CU_ASSERT_EQUAL(r, 0);
2919 		CU_ASSERT_EQUAL(expired, true);
2920 
2921 		/* verify if memcpy test result meets with expected */
2922 		i = 0;
2923 		while(i < bo_dst_size) {
2924 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2925 			i++;
2926 		}
2927 	} else {
2928 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2929 		CU_ASSERT_EQUAL(r, 0);
2930 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2931 	}
2932 
2933 	r = amdgpu_bo_list_destroy(bo_list);
2934 	CU_ASSERT_EQUAL(r, 0);
2935 
2936 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2937 	CU_ASSERT_EQUAL(r, 0);
2938 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2939 	CU_ASSERT_EQUAL(r, 0);
2940 
2941 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2942 	CU_ASSERT_EQUAL(r, 0);
2943 
2944 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2945 	CU_ASSERT_EQUAL(r, 0);
2946 
2947 	r = amdgpu_cs_ctx_free(context_handle);
2948 	CU_ASSERT_EQUAL(r, 0);
2949 }
2950 
amdgpu_compute_dispatch_test(void)2951 static void amdgpu_compute_dispatch_test(void)
2952 {
2953 	int r;
2954 	struct drm_amdgpu_info_hw_ip info;
2955 	uint32_t ring_id, version;
2956 
2957 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2958 	CU_ASSERT_EQUAL(r, 0);
2959 	if (!info.available_rings)
2960 		printf("SKIP ... as there's no compute ring\n");
2961 
2962 	version = info.hw_ip_version_major;
2963 	if (version != 9 && version != 10) {
2964 		printf("SKIP ... unsupported gfx version %d\n", version);
2965 		return;
2966 	}
2967 
2968 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2969 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version);
2970 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0);
2971 	}
2972 }
2973 
amdgpu_gfx_dispatch_test(void)2974 static void amdgpu_gfx_dispatch_test(void)
2975 {
2976 	int r;
2977 	struct drm_amdgpu_info_hw_ip info;
2978 	uint32_t ring_id, version;
2979 
2980 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2981 	CU_ASSERT_EQUAL(r, 0);
2982 	if (!info.available_rings)
2983 		printf("SKIP ... as there's no graphics ring\n");
2984 
2985 	version = info.hw_ip_version_major;
2986 	if (version != 9 && version != 10) {
2987 		printf("SKIP ... unsupported gfx version %d\n", version);
2988 		return;
2989 	}
2990 
2991 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2992 		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version);
2993 		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0);
2994 	}
2995 }
2996 
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2997 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2998 {
2999 	int r;
3000 	struct drm_amdgpu_info_hw_ip info;
3001 	uint32_t ring_id, version;
3002 
3003 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3004 	CU_ASSERT_EQUAL(r, 0);
3005 	if (!info.available_rings)
3006 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3007 
3008 	version = info.hw_ip_version_major;
3009 	if (version != 9 && version != 10) {
3010 		printf("SKIP ... unsupported gfx version %d\n", version);
3011 		return;
3012 	}
3013 
3014 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3015 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3016 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1);
3017 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3018 	}
3019 }
3020 
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int version)3021 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
3022 						  uint32_t ip_type, uint32_t ring, int version)
3023 {
3024 	amdgpu_context_handle context_handle;
3025 	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
3026 	volatile unsigned char *ptr_dst;
3027 	void *ptr_shader;
3028 	unsigned char *ptr_src;
3029 	uint32_t *ptr_cmd;
3030 	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
3031 	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
3032 	int i, r;
3033 	int bo_dst_size = 0x4000000;
3034 	int bo_shader_size = 0x400000;
3035 	int bo_cmd_size = 4096;
3036 	struct amdgpu_cs_request ibs_request = {0};
3037 	struct amdgpu_cs_ib_info ib_info= {0};
3038 	uint32_t hang_state, hangs, expired;
3039 	struct amdgpu_gpu_info gpu_info = {0};
3040 	amdgpu_bo_list_handle bo_list;
3041 	struct amdgpu_cs_fence fence_status = {0};
3042 
3043 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3044 	CU_ASSERT_EQUAL(r, 0);
3045 
3046 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3047 	CU_ASSERT_EQUAL(r, 0);
3048 
3049 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3050 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3051 				    &bo_cmd, (void **)&ptr_cmd,
3052 				    &mc_address_cmd, &va_cmd);
3053 	CU_ASSERT_EQUAL(r, 0);
3054 	memset(ptr_cmd, 0, bo_cmd_size);
3055 
3056 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3057 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3058 					&bo_shader, &ptr_shader,
3059 					&mc_address_shader, &va_shader);
3060 	CU_ASSERT_EQUAL(r, 0);
3061 	memset(ptr_shader, 0, bo_shader_size);
3062 
3063 	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
3064 	CU_ASSERT_EQUAL(r, 0);
3065 
3066 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3067 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3068 					&bo_src, (void **)&ptr_src,
3069 					&mc_address_src, &va_src);
3070 	CU_ASSERT_EQUAL(r, 0);
3071 
3072 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3073 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3074 					&bo_dst, (void **)&ptr_dst,
3075 					&mc_address_dst, &va_dst);
3076 	CU_ASSERT_EQUAL(r, 0);
3077 
3078 	memset(ptr_src, 0x55, bo_dst_size);
3079 
3080 	i = 0;
3081 	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
3082 
3083 	/*  Issue commands to set cu mask used in current dispatch */
3084 	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
3085 
3086 	/* Writes shader state to HW */
3087 	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
3088 
3089 	/* Write constant data */
3090 	/* Writes the texture resource constants data to the SGPRs */
3091 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3092 	ptr_cmd[i++] = 0x240;
3093 	ptr_cmd[i++] = mc_address_src;
3094 	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
3095 	ptr_cmd[i++] = 0x400000;
3096 	if (version == 9)
3097 		ptr_cmd[i++] = 0x74fac;
3098 	else if (version == 10)
3099 		ptr_cmd[i++] = 0x1104bfac;
3100 
3101 	/* Writes the UAV constant data to the SGPRs. */
3102 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3103 	ptr_cmd[i++] = 0x244;
3104 	ptr_cmd[i++] = mc_address_dst;
3105 	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
3106 	ptr_cmd[i++] = 0x400000;
3107 	if (version == 9)
3108 		ptr_cmd[i++] = 0x74fac;
3109 	else if (version == 10)
3110 		ptr_cmd[i++] = 0x1104bfac;
3111 
3112 	/* clear mmCOMPUTE_RESOURCE_LIMITS */
3113 	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
3114 	ptr_cmd[i++] = 0x215;
3115 	ptr_cmd[i++] = 0;
3116 
3117 	/* dispatch direct command */
3118 	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
3119 	ptr_cmd[i++] = 0x10000;
3120 	ptr_cmd[i++] = 1;
3121 	ptr_cmd[i++] = 1;
3122 	ptr_cmd[i++] = 1;
3123 
3124 	while (i & 7)
3125 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3126 
3127 	resources[0] = bo_shader;
3128 	resources[1] = bo_src;
3129 	resources[2] = bo_dst;
3130 	resources[3] = bo_cmd;
3131 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3132 	CU_ASSERT_EQUAL(r, 0);
3133 
3134 	ib_info.ib_mc_address = mc_address_cmd;
3135 	ib_info.size = i;
3136 	ibs_request.ip_type = ip_type;
3137 	ibs_request.ring = ring;
3138 	ibs_request.resources = bo_list;
3139 	ibs_request.number_of_ibs = 1;
3140 	ibs_request.ibs = &ib_info;
3141 	ibs_request.fence_info.handle = NULL;
3142 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3143 	CU_ASSERT_EQUAL(r, 0);
3144 
3145 	fence_status.ip_type = ip_type;
3146 	fence_status.ip_instance = 0;
3147 	fence_status.ring = ring;
3148 	fence_status.context = context_handle;
3149 	fence_status.fence = ibs_request.seq_no;
3150 
3151 	/* wait for IB accomplished */
3152 	r = amdgpu_cs_query_fence_status(&fence_status,
3153 					 AMDGPU_TIMEOUT_INFINITE,
3154 					 0, &expired);
3155 
3156 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3157 	CU_ASSERT_EQUAL(r, 0);
3158 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3159 
3160 	r = amdgpu_bo_list_destroy(bo_list);
3161 	CU_ASSERT_EQUAL(r, 0);
3162 
3163 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
3164 	CU_ASSERT_EQUAL(r, 0);
3165 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3166 	CU_ASSERT_EQUAL(r, 0);
3167 
3168 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3169 	CU_ASSERT_EQUAL(r, 0);
3170 
3171 	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
3172 	CU_ASSERT_EQUAL(r, 0);
3173 
3174 	r = amdgpu_cs_ctx_free(context_handle);
3175 	CU_ASSERT_EQUAL(r, 0);
3176 }
3177 
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)3178 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
3179 {
3180 	int r;
3181 	struct drm_amdgpu_info_hw_ip info;
3182 	uint32_t ring_id, version;
3183 
3184 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3185 	CU_ASSERT_EQUAL(r, 0);
3186 	if (!info.available_rings)
3187 		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3188 
3189 	version = info.hw_ip_version_major;
3190 	if (version != 9 && version != 10) {
3191 		printf("SKIP ... unsupported gfx version %d\n", version);
3192 		return;
3193 	}
3194 
3195 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3196 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3197 		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version);
3198 		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3199 	}
3200 }
3201 
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)3202 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
3203 {
3204 	struct amdgpu_test_shader *shader;
3205 	int i, loop = 0x40000;
3206 
3207 	switch (family) {
3208 		case AMDGPU_FAMILY_AI:
3209 		case AMDGPU_FAMILY_RV:
3210 			shader = &memcpy_ps_hang_slow_ai;
3211 			break;
3212 		default:
3213 			return -1;
3214 			break;
3215 	}
3216 
3217 	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
3218 
3219 	for (i = 0; i < loop; i++)
3220 		memcpy(ptr + shader->header_length + shader->body_length * i,
3221 			shader->shader + shader->header_length,
3222 			shader->body_length * sizeof(uint32_t));
3223 
3224 	memcpy(ptr + shader->header_length + shader->body_length * loop,
3225 		shader->shader + shader->header_length + shader->body_length,
3226 		shader->foot_length * sizeof(uint32_t));
3227 
3228 	return 0;
3229 }
3230 
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type,uint32_t version)3231 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version)
3232 {
3233 	int i;
3234 	uint32_t shader_offset= 256;
3235 	uint32_t mem_offset, patch_code_offset;
3236 	uint32_t shader_size, patchinfo_code_size;
3237 	const uint32_t *shader;
3238 	const uint32_t *patchinfo_code;
3239 	const uint32_t *patchcode_offset;
3240 
3241 	switch (ps_type) {
3242 		case PS_CONST:
3243 			if (version == 9) {
3244 				shader = ps_const_shader_gfx9;
3245 				shader_size = sizeof(ps_const_shader_gfx9);
3246 				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3247 				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3248 				patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3249 			} else if (version == 10){
3250 				shader = ps_const_shader_gfx10;
3251 				shader_size = sizeof(ps_const_shader_gfx10);
3252 				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10;
3253 				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10;
3254 				patchcode_offset = ps_const_shader_patchinfo_offset_gfx10;
3255 			}
3256 			break;
3257 		case PS_TEX:
3258 			if (version == 9) {
3259 				shader = ps_tex_shader_gfx9;
3260 				shader_size = sizeof(ps_tex_shader_gfx9);
3261 				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3262 				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3263 				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3264 			} else if (version == 10) {
3265 				shader = ps_tex_shader_gfx10;
3266 				shader_size = sizeof(ps_tex_shader_gfx10);
3267 				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10;
3268 				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10;
3269 				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10;
3270 			}
3271 			break;
3272 		case PS_HANG:
3273 			shader = memcpy_ps_hang;
3274 			shader_size = sizeof(memcpy_ps_hang);
3275 
3276 			memcpy(ptr, shader, shader_size);
3277 			return 0;
3278 		default:
3279 			return -1;
3280 			break;
3281 	}
3282 
3283 	/* write main shader program */
3284 	for (i = 0 ; i < 10; i++) {
3285 		mem_offset = i * shader_offset;
3286 		memcpy(ptr + mem_offset, shader, shader_size);
3287 	}
3288 
3289 	/* overwrite patch codes */
3290 	for (i = 0 ; i < 10; i++) {
3291 		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3292 		patch_code_offset = i * patchinfo_code_size;
3293 		memcpy(ptr + mem_offset,
3294 			patchinfo_code + patch_code_offset,
3295 			patchinfo_code_size * sizeof(uint32_t));
3296 	}
3297 
3298 	return 0;
3299 }
3300 
3301 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr,uint32_t version)3302 static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version)
3303 {
3304 	const uint32_t *shader;
3305 	uint32_t shader_size;
3306 
3307 	if (version == 9) {
3308 		shader = vs_RectPosTexFast_shader_gfx9;
3309 		shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3310 	} else if (version == 10) {
3311 		shader = vs_RectPosTexFast_shader_gfx10;
3312 		shader_size = sizeof(vs_RectPosTexFast_shader_gfx10);
3313 	}
3314 
3315 	memcpy(ptr, shader, shader_size);
3316 
3317 	return 0;
3318 }
3319 
amdgpu_draw_init(uint32_t * ptr,uint32_t version)3320 static int amdgpu_draw_init(uint32_t *ptr, uint32_t version)
3321 {
3322 	int i = 0;
3323 	const uint32_t *preamblecache_ptr;
3324 	uint32_t preamblecache_size;
3325 
3326 	/* Write context control and load shadowing register if necessary */
3327 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3328 	ptr[i++] = 0x80000000;
3329 	ptr[i++] = 0x80000000;
3330 
3331 	if (version == 9) {
3332 		preamblecache_ptr = preamblecache_gfx9;
3333 		preamblecache_size = sizeof(preamblecache_gfx9);
3334 	} else if (version == 10) {
3335 		preamblecache_ptr = preamblecache_gfx10;
3336 		preamblecache_size = sizeof(preamblecache_gfx10);
3337 	}
3338 
3339 	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3340 	return i + preamblecache_size/sizeof(uint32_t);
3341 }
3342 
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,uint32_t version,int hang_slow)3343 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3344 							 uint64_t dst_addr,
3345 							 uint32_t version,
3346 							 int hang_slow)
3347 {
3348 	int i = 0;
3349 
3350 	/* setup color buffer */
3351 	if (version == 9) {
3352 		/* offset   reg
3353 		   0xA318   CB_COLOR0_BASE
3354 		   0xA319   CB_COLOR0_BASE_EXT
3355 		   0xA31A   CB_COLOR0_ATTRIB2
3356 		   0xA31B   CB_COLOR0_VIEW
3357 		   0xA31C   CB_COLOR0_INFO
3358 		   0xA31D   CB_COLOR0_ATTRIB
3359 		   0xA31E   CB_COLOR0_DCC_CONTROL
3360 		   0xA31F   CB_COLOR0_CMASK
3361 		   0xA320   CB_COLOR0_CMASK_BASE_EXT
3362 		   0xA321   CB_COLOR0_FMASK
3363 		   0xA322   CB_COLOR0_FMASK_BASE_EXT
3364 		   0xA323   CB_COLOR0_CLEAR_WORD0
3365 		   0xA324   CB_COLOR0_CLEAR_WORD1
3366 		   0xA325   CB_COLOR0_DCC_BASE
3367 		   0xA326   CB_COLOR0_DCC_BASE_EXT */
3368 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3369 		ptr[i++] = 0x318;
3370 		ptr[i++] = dst_addr >> 8;
3371 		ptr[i++] = dst_addr >> 40;
3372 		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3373 		ptr[i++] = 0;
3374 		ptr[i++] = 0x50438;
3375 		ptr[i++] = 0x10140000;
3376 		i += 9;
3377 
3378 		/* mmCB_MRT0_EPITCH */
3379 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3380 		ptr[i++] = 0x1e8;
3381 		ptr[i++] = hang_slow ? 0xfff : 0x1f;
3382 	} else if (version == 10) {
3383 		/* 0xA318   CB_COLOR0_BASE
3384 		   0xA319   CB_COLOR0_PITCH
3385 		   0xA31A   CB_COLOR0_SLICE
3386 		   0xA31B   CB_COLOR0_VIEW
3387 		   0xA31C   CB_COLOR0_INFO
3388 		   0xA31D   CB_COLOR0_ATTRIB
3389 		   0xA31E   CB_COLOR0_DCC_CONTROL
3390 		   0xA31F   CB_COLOR0_CMASK
3391 		   0xA320   CB_COLOR0_CMASK_SLICE
3392 		   0xA321   CB_COLOR0_FMASK
3393 		   0xA322   CB_COLOR0_FMASK_SLICE
3394 		   0xA323   CB_COLOR0_CLEAR_WORD0
3395 		   0xA324   CB_COLOR0_CLEAR_WORD1
3396 		   0xA325   CB_COLOR0_DCC_BASE */
3397 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14);
3398 		ptr[i++] = 0x318;
3399 		ptr[i++] = dst_addr >> 8;
3400 		i += 3;
3401 		ptr[i++] = 0x50438;
3402 		i += 9;
3403 
3404 		/* 0xA390   CB_COLOR0_BASE_EXT */
3405 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3406 		ptr[i++] = 0x390;
3407 		ptr[i++] = dst_addr >> 40;
3408 
3409 		/* 0xA398   CB_COLOR0_CMASK_BASE_EXT */
3410 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3411 		ptr[i++] = 0x398;
3412 		ptr[i++] = 0;
3413 
3414 		/* 0xA3A0   CB_COLOR0_FMASK_BASE_EXT */
3415 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3416 		ptr[i++] = 0x3a0;
3417 		ptr[i++] = 0;
3418 
3419 		/* 0xA3A8   CB_COLOR0_DCC_BASE_EXT */
3420 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3421 		ptr[i++] = 0x3a8;
3422 		ptr[i++] = 0;
3423 
3424 		/* 0xA3B0   CB_COLOR0_ATTRIB2 */
3425 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3426 		ptr[i++] = 0x3b0;
3427 		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3428 
3429 		/* 0xA3B8   CB_COLOR0_ATTRIB3 */
3430 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3431 		ptr[i++] = 0x3b8;
3432 		ptr[i++] = 0x9014000;
3433 	}
3434 
3435 	/* 0xA32B   CB_COLOR1_BASE */
3436 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3437 	ptr[i++] = 0x32b;
3438 	ptr[i++] = 0;
3439 
3440 	/* 0xA33A   CB_COLOR1_BASE */
3441 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3442 	ptr[i++] = 0x33a;
3443 	ptr[i++] = 0;
3444 
3445 	/* SPI_SHADER_COL_FORMAT */
3446 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3447 	ptr[i++] = 0x1c5;
3448 	ptr[i++] = 9;
3449 
3450 	/* Setup depth buffer */
3451 	if (version == 9) {
3452 		/* mmDB_Z_INFO */
3453 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3454 		ptr[i++] = 0xe;
3455 		i += 2;
3456 	} else if (version == 10) {
3457 		/* mmDB_Z_INFO */
3458 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3459 		ptr[i++] = 0x10;
3460 		i += 2;
3461 	}
3462 
3463 	return i;
3464 }
3465 
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,uint32_t version,int hang_slow)3466 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr,
3467 						     uint32_t version,
3468 						     int hang_slow)
3469 {
3470 	int i = 0;
3471 	const uint32_t *cached_cmd_ptr;
3472 	uint32_t cached_cmd_size;
3473 
3474 	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3475 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3476 	ptr[i++] = 0xd7;
3477 	ptr[i++] = 0;
3478 
3479 	ptr[i++] = 0xffff1000;
3480 	ptr[i++] = 0xc0021000;
3481 
3482 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3483 	ptr[i++] = 0xd7;
3484 	if (version == 9)
3485 		ptr[i++] = 1;
3486 	else if (version == 10)
3487 		ptr[i++] = 0;
3488 
3489 	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3490 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3491 	ptr[i++] = 0x2fe;
3492 	i += 16;
3493 
3494 	/* mmPA_SC_CENTROID_PRIORITY_0 */
3495 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3496 	ptr[i++] = 0x2f5;
3497 	i += 2;
3498 
3499 	if (version == 9) {
3500 		cached_cmd_ptr = cached_cmd_gfx9;
3501 		cached_cmd_size = sizeof(cached_cmd_gfx9);
3502 	} else if (version == 10) {
3503 		cached_cmd_ptr = cached_cmd_gfx10;
3504 		cached_cmd_size = sizeof(cached_cmd_gfx10);
3505 	}
3506 
3507 	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3508 	if (hang_slow)
3509 		*(ptr + i + 12) = 0x8000800;
3510 	i += cached_cmd_size/sizeof(uint32_t);
3511 
3512 	if (version == 10) {
3513 		/* mmCB_RMI_GL2_CACHE_CONTROL */
3514 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3515 		ptr[i++] = 0x104;
3516 		ptr[i++] = 0x40aa0055;
3517 		/* mmDB_RMI_L2_CACHE_CONTROL */
3518 		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3519 		ptr[i++] = 0x1f;
3520 		ptr[i++] = 0x2a0055;
3521 	}
3522 
3523 	return i;
3524 }
3525 
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,uint32_t version,int hang_slow)3526 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3527 						  int ps_type,
3528 						  uint64_t shader_addr,
3529 						  uint32_t version,
3530 						  int hang_slow)
3531 {
3532 	int i = 0;
3533 
3534 	/* mmPA_CL_VS_OUT_CNTL */
3535 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3536 	ptr[i++] = 0x207;
3537 	ptr[i++] = 0;
3538 
3539 	if (version == 9) {
3540 		/* mmSPI_SHADER_PGM_RSRC3_VS */
3541 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3542 		ptr[i++] = 0x46;
3543 		ptr[i++] = 0xffff;
3544 	} else if (version == 10) {
3545 		/* mmSPI_SHADER_PGM_RSRC3_VS */
3546 		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3547 		ptr[i++] = 0x30000046;
3548 		ptr[i++] = 0xffff;
3549 		/* mmSPI_SHADER_PGM_RSRC4_VS */
3550 		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3551 		ptr[i++] = 0x30000041;
3552 		ptr[i++] = 0xffff;
3553 	}
3554 
3555 	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3556 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3557 	ptr[i++] = 0x48;
3558 	ptr[i++] = shader_addr >> 8;
3559 	ptr[i++] = shader_addr >> 40;
3560 
3561 	/* mmSPI_SHADER_PGM_RSRC1_VS */
3562 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3563 	ptr[i++] = 0x4a;
3564 	if (version == 9)
3565 		ptr[i++] = 0xc0081;
3566 	else if (version == 10)
3567 		ptr[i++] = 0xc0041;
3568 	/* mmSPI_SHADER_PGM_RSRC2_VS */
3569 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3570 	ptr[i++] = 0x4b;
3571 	ptr[i++] = 0x18;
3572 
3573 	/* mmSPI_VS_OUT_CONFIG */
3574 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3575 	ptr[i++] = 0x1b1;
3576 	ptr[i++] = 2;
3577 
3578 	/* mmSPI_SHADER_POS_FORMAT */
3579 	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3580 	ptr[i++] = 0x1c3;
3581 	ptr[i++] = 4;
3582 
3583 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3584 	ptr[i++] = 0x4c;
3585 	i += 2;
3586 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3587 	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3588 
3589 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3590 	ptr[i++] = 0x50;
3591 	i += 2;
3592 	if (ps_type == PS_CONST) {
3593 		i += 2;
3594 	} else if (ps_type == PS_TEX) {
3595 		ptr[i++] = 0x3f800000;
3596 		ptr[i++] = 0x3f800000;
3597 	}
3598 
3599 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3600 	ptr[i++] = 0x54;
3601 	i += 4;
3602 
3603 	return i;
3604 }
3605 
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,uint32_t version)3606 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3607 				   int ps_type,
3608 				   uint64_t shader_addr,
3609 				   uint32_t version)
3610 {
3611 	int i, j;
3612 	const uint32_t *sh_registers;
3613 	const uint32_t *context_registers;
3614 	uint32_t num_sh_reg, num_context_reg;
3615 
3616 	if (ps_type == PS_CONST) {
3617 		if (version == 9) {
3618 			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3619 			num_sh_reg = ps_num_sh_registers_gfx9;
3620 		} else if (version == 10) {
3621 			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10;
3622 			num_sh_reg = ps_num_sh_registers_gfx10;
3623 		}
3624 		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3625 		num_context_reg = ps_num_context_registers_gfx9;
3626 	} else if (ps_type == PS_TEX) {
3627 		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3628 		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3629 		num_sh_reg = ps_num_sh_registers_gfx9;
3630 		num_context_reg = ps_num_context_registers_gfx9;
3631 	}
3632 
3633 	i = 0;
3634 
3635 	if (version == 9) {
3636 		/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3637 		   0x2c08   SPI_SHADER_PGM_LO_PS
3638 		   0x2c09   SPI_SHADER_PGM_HI_PS */
3639 		/* multiplicator 9 is from  SPI_SHADER_COL_FORMAT */
3640 		shader_addr += 256 * 9;
3641 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3642 		ptr[i++] = 0x7;
3643 		ptr[i++] = 0xffff;
3644 		ptr[i++] = shader_addr >> 8;
3645 		ptr[i++] = shader_addr >> 40;
3646 	} else if (version == 10) {
3647 		shader_addr += 256 * 9;
3648 		/* 0x2c08	 SPI_SHADER_PGM_LO_PS
3649 		     0x2c09	 SPI_SHADER_PGM_HI_PS */
3650 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3651 		ptr[i++] = 0x8;
3652 		ptr[i++] = shader_addr >> 8;
3653 		ptr[i++] = shader_addr >> 40;
3654 
3655 		/* mmSPI_SHADER_PGM_RSRC3_PS */
3656 		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3657 		ptr[i++] = 0x30000007;
3658 		ptr[i++] = 0xffff;
3659 		/* mmSPI_SHADER_PGM_RSRC4_PS */
3660 		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3661 		ptr[i++] = 0x30000001;
3662 		ptr[i++] = 0xffff;
3663 	}
3664 
3665 	for (j = 0; j < num_sh_reg; j++) {
3666 		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3667 		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3668 		ptr[i++] = sh_registers[j * 2 + 1];
3669 	}
3670 
3671 	for (j = 0; j < num_context_reg; j++) {
3672 		if (context_registers[j * 2] != 0xA1C5) {
3673 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3674 			ptr[i++] = context_registers[j * 2] - 0xa000;
3675 			ptr[i++] = context_registers[j * 2 + 1];
3676 		}
3677 
3678 		if (context_registers[j * 2] == 0xA1B4) {
3679 			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3680 			ptr[i++] = 0x1b3;
3681 			ptr[i++] = 2;
3682 		}
3683 	}
3684 
3685 	return i;
3686 }
3687 
amdgpu_draw_draw(uint32_t * ptr,uint32_t version)3688 static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version)
3689 {
3690 	int i = 0;
3691 
3692 	if (version == 9) {
3693 		/* mmIA_MULTI_VGT_PARAM */
3694 		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3695 		ptr[i++] = 0x40000258;
3696 		ptr[i++] = 0xd00ff;
3697 		/* mmVGT_PRIMITIVE_TYPE */
3698 		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3699 		ptr[i++] = 0x10000242;
3700 		ptr[i++] = 0x11;
3701 	} else if (version == 10) {
3702 		/* mmGE_CNTL */
3703 		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3704 		ptr[i++] = 0x25b;
3705 		ptr[i++] = 0xff;
3706 		/* mmVGT_PRIMITIVE_TYPE */
3707 		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3708 		ptr[i++] = 0x242;
3709 		ptr[i++] = 0x11;
3710 	}
3711 
3712 	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3713 	ptr[i++] = 3;
3714 	ptr[i++] = 2;
3715 
3716 	return i;
3717 }
3718 
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id,uint32_t version)3719 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3720 			amdgpu_bo_handle bo_shader_ps,
3721 			amdgpu_bo_handle bo_shader_vs,
3722 			uint64_t mc_address_shader_ps,
3723 			uint64_t mc_address_shader_vs,
3724 			uint32_t ring_id, uint32_t version)
3725 {
3726 	amdgpu_context_handle context_handle;
3727 	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3728 	volatile unsigned char *ptr_dst;
3729 	uint32_t *ptr_cmd;
3730 	uint64_t mc_address_dst, mc_address_cmd;
3731 	amdgpu_va_handle va_dst, va_cmd;
3732 	int i, r;
3733 	int bo_dst_size = 16384;
3734 	int bo_cmd_size = 4096;
3735 	struct amdgpu_cs_request ibs_request = {0};
3736 	struct amdgpu_cs_ib_info ib_info = {0};
3737 	struct amdgpu_cs_fence fence_status = {0};
3738 	uint32_t expired;
3739 	amdgpu_bo_list_handle bo_list;
3740 
3741 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3742 	CU_ASSERT_EQUAL(r, 0);
3743 
3744 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3745 					AMDGPU_GEM_DOMAIN_GTT, 0,
3746 					&bo_cmd, (void **)&ptr_cmd,
3747 					&mc_address_cmd, &va_cmd);
3748 	CU_ASSERT_EQUAL(r, 0);
3749 	memset(ptr_cmd, 0, bo_cmd_size);
3750 
3751 	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3752 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3753 					&bo_dst, (void **)&ptr_dst,
3754 					&mc_address_dst, &va_dst);
3755 	CU_ASSERT_EQUAL(r, 0);
3756 
3757 	i = 0;
3758 	i += amdgpu_draw_init(ptr_cmd + i, version);
3759 
3760 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3761 
3762 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3763 
3764 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs,
3765 						    version, 0);
3766 
3767 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version);
3768 
3769 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770 	ptr_cmd[i++] = 0xc;
3771 	ptr_cmd[i++] = 0x33333333;
3772 	ptr_cmd[i++] = 0x33333333;
3773 	ptr_cmd[i++] = 0x33333333;
3774 	ptr_cmd[i++] = 0x33333333;
3775 
3776 	i += amdgpu_draw_draw(ptr_cmd + i, version);
3777 
3778 	while (i & 7)
3779 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3780 
3781 	resources[0] = bo_dst;
3782 	resources[1] = bo_shader_ps;
3783 	resources[2] = bo_shader_vs;
3784 	resources[3] = bo_cmd;
3785 	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3786 	CU_ASSERT_EQUAL(r, 0);
3787 
3788 	ib_info.ib_mc_address = mc_address_cmd;
3789 	ib_info.size = i;
3790 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3791 	ibs_request.ring = ring_id;
3792 	ibs_request.resources = bo_list;
3793 	ibs_request.number_of_ibs = 1;
3794 	ibs_request.ibs = &ib_info;
3795 	ibs_request.fence_info.handle = NULL;
3796 
3797 	/* submit CS */
3798 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3799 	CU_ASSERT_EQUAL(r, 0);
3800 
3801 	r = amdgpu_bo_list_destroy(bo_list);
3802 	CU_ASSERT_EQUAL(r, 0);
3803 
3804 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3805 	fence_status.ip_instance = 0;
3806 	fence_status.ring = ring_id;
3807 	fence_status.context = context_handle;
3808 	fence_status.fence = ibs_request.seq_no;
3809 
3810 	/* wait for IB accomplished */
3811 	r = amdgpu_cs_query_fence_status(&fence_status,
3812 					 AMDGPU_TIMEOUT_INFINITE,
3813 					 0, &expired);
3814 	CU_ASSERT_EQUAL(r, 0);
3815 	CU_ASSERT_EQUAL(expired, true);
3816 
3817 	/* verify if memset test result meets with expected */
3818 	i = 0;
3819 	while(i < bo_dst_size) {
3820 		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3821 	}
3822 
3823 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3824 	CU_ASSERT_EQUAL(r, 0);
3825 
3826 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3827 	CU_ASSERT_EQUAL(r, 0);
3828 
3829 	r = amdgpu_cs_ctx_free(context_handle);
3830 	CU_ASSERT_EQUAL(r, 0);
3831 }
3832 
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int version)3833 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3834 				    uint32_t ring, int version)
3835 {
3836 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3837 	void *ptr_shader_ps;
3838 	void *ptr_shader_vs;
3839 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3840 	amdgpu_va_handle va_shader_ps, va_shader_vs;
3841 	int r;
3842 	int bo_shader_size = 4096;
3843 
3844 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3845 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3846 					&bo_shader_ps, &ptr_shader_ps,
3847 					&mc_address_shader_ps, &va_shader_ps);
3848 	CU_ASSERT_EQUAL(r, 0);
3849 	memset(ptr_shader_ps, 0, bo_shader_size);
3850 
3851 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3852 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3853 					&bo_shader_vs, &ptr_shader_vs,
3854 					&mc_address_shader_vs, &va_shader_vs);
3855 	CU_ASSERT_EQUAL(r, 0);
3856 	memset(ptr_shader_vs, 0, bo_shader_size);
3857 
3858 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version);
3859 	CU_ASSERT_EQUAL(r, 0);
3860 
3861 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
3862 	CU_ASSERT_EQUAL(r, 0);
3863 
3864 	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3865 			mc_address_shader_ps, mc_address_shader_vs,
3866 			ring, version);
3867 
3868 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3869 	CU_ASSERT_EQUAL(r, 0);
3870 
3871 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3872 	CU_ASSERT_EQUAL(r, 0);
3873 }
3874 
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int version,int hang)3875 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3876 			       amdgpu_bo_handle bo_shader_ps,
3877 			       amdgpu_bo_handle bo_shader_vs,
3878 			       uint64_t mc_address_shader_ps,
3879 			       uint64_t mc_address_shader_vs,
3880 			       uint32_t ring, int version, int hang)
3881 {
3882 	amdgpu_context_handle context_handle;
3883 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3884 	volatile unsigned char *ptr_dst;
3885 	unsigned char *ptr_src;
3886 	uint32_t *ptr_cmd;
3887 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3888 	amdgpu_va_handle va_dst, va_src, va_cmd;
3889 	int i, r;
3890 	int bo_size = 16384;
3891 	int bo_cmd_size = 4096;
3892 	struct amdgpu_cs_request ibs_request = {0};
3893 	struct amdgpu_cs_ib_info ib_info= {0};
3894 	uint32_t hang_state, hangs;
3895 	uint32_t expired;
3896 	amdgpu_bo_list_handle bo_list;
3897 	struct amdgpu_cs_fence fence_status = {0};
3898 
3899 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3900 	CU_ASSERT_EQUAL(r, 0);
3901 
3902 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3903 				    AMDGPU_GEM_DOMAIN_GTT, 0,
3904 				    &bo_cmd, (void **)&ptr_cmd,
3905 				    &mc_address_cmd, &va_cmd);
3906 	CU_ASSERT_EQUAL(r, 0);
3907 	memset(ptr_cmd, 0, bo_cmd_size);
3908 
3909 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3910 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3911 					&bo_src, (void **)&ptr_src,
3912 					&mc_address_src, &va_src);
3913 	CU_ASSERT_EQUAL(r, 0);
3914 
3915 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3916 					AMDGPU_GEM_DOMAIN_VRAM, 0,
3917 					&bo_dst, (void **)&ptr_dst,
3918 					&mc_address_dst, &va_dst);
3919 	CU_ASSERT_EQUAL(r, 0);
3920 
3921 	memset(ptr_src, 0x55, bo_size);
3922 
3923 	i = 0;
3924 	i += amdgpu_draw_init(ptr_cmd + i, version);
3925 
3926 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3927 
3928 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3929 
3930 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs,
3931 						    version, 0);
3932 
3933 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
3934 
3935 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3936 	if (version == 9) {
3937 		ptr_cmd[i++] = 0xc;
3938 		ptr_cmd[i++] = mc_address_src >> 8;
3939 		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3940 		ptr_cmd[i++] = 0x7c01f;
3941 		ptr_cmd[i++] = 0x90500fac;
3942 		ptr_cmd[i++] = 0x3e000;
3943 		i += 3;
3944 	} else if (version == 10) {
3945 		ptr_cmd[i++] = 0xc;
3946 		ptr_cmd[i++] = mc_address_src >> 8;
3947 		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
3948 		ptr_cmd[i++] = 0x8007c007;
3949 		ptr_cmd[i++] = 0x90500fac;
3950 		i += 2;
3951 		ptr_cmd[i++] = 0x400;
3952 		i++;
3953 	}
3954 
3955 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3956 	ptr_cmd[i++] = 0x14;
3957 	ptr_cmd[i++] = 0x92;
3958 	i += 3;
3959 
3960 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3961 	ptr_cmd[i++] = 0x191;
3962 	ptr_cmd[i++] = 0;
3963 
3964 	i += amdgpu_draw_draw(ptr_cmd + i, version);
3965 
3966 	while (i & 7)
3967 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3968 
3969 	resources[0] = bo_dst;
3970 	resources[1] = bo_src;
3971 	resources[2] = bo_shader_ps;
3972 	resources[3] = bo_shader_vs;
3973 	resources[4] = bo_cmd;
3974 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3975 	CU_ASSERT_EQUAL(r, 0);
3976 
3977 	ib_info.ib_mc_address = mc_address_cmd;
3978 	ib_info.size = i;
3979 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3980 	ibs_request.ring = ring;
3981 	ibs_request.resources = bo_list;
3982 	ibs_request.number_of_ibs = 1;
3983 	ibs_request.ibs = &ib_info;
3984 	ibs_request.fence_info.handle = NULL;
3985 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3986 	CU_ASSERT_EQUAL(r, 0);
3987 
3988 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3989 	fence_status.ip_instance = 0;
3990 	fence_status.ring = ring;
3991 	fence_status.context = context_handle;
3992 	fence_status.fence = ibs_request.seq_no;
3993 
3994 	/* wait for IB accomplished */
3995 	r = amdgpu_cs_query_fence_status(&fence_status,
3996 					 AMDGPU_TIMEOUT_INFINITE,
3997 					 0, &expired);
3998 	if (!hang) {
3999 		CU_ASSERT_EQUAL(r, 0);
4000 		CU_ASSERT_EQUAL(expired, true);
4001 
4002 		/* verify if memcpy test result meets with expected */
4003 		i = 0;
4004 		while(i < bo_size) {
4005 			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
4006 			i++;
4007 		}
4008 	} else {
4009 		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4010 		CU_ASSERT_EQUAL(r, 0);
4011 		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4012 	}
4013 
4014 	r = amdgpu_bo_list_destroy(bo_list);
4015 	CU_ASSERT_EQUAL(r, 0);
4016 
4017 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4018 	CU_ASSERT_EQUAL(r, 0);
4019 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4020 	CU_ASSERT_EQUAL(r, 0);
4021 
4022 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4023 	CU_ASSERT_EQUAL(r, 0);
4024 
4025 	r = amdgpu_cs_ctx_free(context_handle);
4026 	CU_ASSERT_EQUAL(r, 0);
4027 }
4028 
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int version,int hang)4029 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
4030 			     int version, int hang)
4031 {
4032 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4033 	void *ptr_shader_ps;
4034 	void *ptr_shader_vs;
4035 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
4036 	amdgpu_va_handle va_shader_ps, va_shader_vs;
4037 	int bo_shader_size = 4096;
4038 	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
4039 	int r;
4040 
4041 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4042 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4043 					&bo_shader_ps, &ptr_shader_ps,
4044 					&mc_address_shader_ps, &va_shader_ps);
4045 	CU_ASSERT_EQUAL(r, 0);
4046 	memset(ptr_shader_ps, 0, bo_shader_size);
4047 
4048 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4049 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4050 					&bo_shader_vs, &ptr_shader_vs,
4051 					&mc_address_shader_vs, &va_shader_vs);
4052 	CU_ASSERT_EQUAL(r, 0);
4053 	memset(ptr_shader_vs, 0, bo_shader_size);
4054 
4055 	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version);
4056 	CU_ASSERT_EQUAL(r, 0);
4057 
4058 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4059 	CU_ASSERT_EQUAL(r, 0);
4060 
4061 	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
4062 			mc_address_shader_ps, mc_address_shader_vs,
4063 			ring, version, hang);
4064 
4065 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
4066 	CU_ASSERT_EQUAL(r, 0);
4067 
4068 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
4069 	CU_ASSERT_EQUAL(r, 0);
4070 }
4071 
amdgpu_draw_test(void)4072 static void amdgpu_draw_test(void)
4073 {
4074 	int r;
4075 	struct drm_amdgpu_info_hw_ip info;
4076 	uint32_t ring_id, version;
4077 
4078 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
4079 	CU_ASSERT_EQUAL(r, 0);
4080 	if (!info.available_rings)
4081 		printf("SKIP ... as there's no graphics ring\n");
4082 
4083 	version = info.hw_ip_version_major;
4084 	if (version != 9 && version != 10) {
4085 		printf("SKIP ... unsupported gfx version %d\n", version);
4086 		return;
4087 	}
4088 
4089 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
4090 		amdgpu_memset_draw_test(device_handle, ring_id, version);
4091 		amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0);
4092 	}
4093 }
4094 
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring,int version)4095 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version)
4096 {
4097 	amdgpu_context_handle context_handle;
4098 	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4099 	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
4100 	void *ptr_shader_ps;
4101 	void *ptr_shader_vs;
4102 	volatile unsigned char *ptr_dst;
4103 	unsigned char *ptr_src;
4104 	uint32_t *ptr_cmd;
4105 	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
4106 	uint64_t mc_address_shader_ps, mc_address_shader_vs;
4107 	amdgpu_va_handle va_shader_ps, va_shader_vs;
4108 	amdgpu_va_handle va_dst, va_src, va_cmd;
4109 	struct amdgpu_gpu_info gpu_info = {0};
4110 	int i, r;
4111 	int bo_size = 0x4000000;
4112 	int bo_shader_ps_size = 0x400000;
4113 	int bo_shader_vs_size = 4096;
4114 	int bo_cmd_size = 4096;
4115 	struct amdgpu_cs_request ibs_request = {0};
4116 	struct amdgpu_cs_ib_info ib_info= {0};
4117 	uint32_t hang_state, hangs, expired;
4118 	amdgpu_bo_list_handle bo_list;
4119 	struct amdgpu_cs_fence fence_status = {0};
4120 
4121 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
4122 	CU_ASSERT_EQUAL(r, 0);
4123 
4124 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4125 	CU_ASSERT_EQUAL(r, 0);
4126 
4127 	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
4128 				    AMDGPU_GEM_DOMAIN_GTT, 0,
4129 				    &bo_cmd, (void **)&ptr_cmd,
4130 				    &mc_address_cmd, &va_cmd);
4131 	CU_ASSERT_EQUAL(r, 0);
4132 	memset(ptr_cmd, 0, bo_cmd_size);
4133 
4134 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
4135 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4136 					&bo_shader_ps, &ptr_shader_ps,
4137 					&mc_address_shader_ps, &va_shader_ps);
4138 	CU_ASSERT_EQUAL(r, 0);
4139 	memset(ptr_shader_ps, 0, bo_shader_ps_size);
4140 
4141 	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
4142 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4143 					&bo_shader_vs, &ptr_shader_vs,
4144 					&mc_address_shader_vs, &va_shader_vs);
4145 	CU_ASSERT_EQUAL(r, 0);
4146 	memset(ptr_shader_vs, 0, bo_shader_vs_size);
4147 
4148 	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
4149 	CU_ASSERT_EQUAL(r, 0);
4150 
4151 	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4152 	CU_ASSERT_EQUAL(r, 0);
4153 
4154 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4155 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4156 					&bo_src, (void **)&ptr_src,
4157 					&mc_address_src, &va_src);
4158 	CU_ASSERT_EQUAL(r, 0);
4159 
4160 	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4161 					AMDGPU_GEM_DOMAIN_VRAM, 0,
4162 					&bo_dst, (void **)&ptr_dst,
4163 					&mc_address_dst, &va_dst);
4164 	CU_ASSERT_EQUAL(r, 0);
4165 
4166 	memset(ptr_src, 0x55, bo_size);
4167 
4168 	i = 0;
4169 	i += amdgpu_draw_init(ptr_cmd + i, version);
4170 
4171 	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1);
4172 
4173 	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1);
4174 
4175 	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
4176 							mc_address_shader_vs, version, 1);
4177 
4178 	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
4179 
4180 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
4181 
4182 	if (version == 9) {
4183 		ptr_cmd[i++] = 0xc;
4184 		ptr_cmd[i++] = mc_address_src >> 8;
4185 		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
4186 		ptr_cmd[i++] = 0x1ffcfff;
4187 		ptr_cmd[i++] = 0x90500fac;
4188 		ptr_cmd[i++] = 0x1ffe000;
4189 		i += 3;
4190 	} else if (version == 10) {
4191 		ptr_cmd[i++] = 0xc;
4192 		ptr_cmd[i++] = mc_address_src >> 8;
4193 		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
4194 		ptr_cmd[i++] = 0x81ffc1ff;
4195 		ptr_cmd[i++] = 0x90500fac;
4196 		i += 4;
4197 	}
4198 
4199 	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
4200 	ptr_cmd[i++] = 0x14;
4201 	ptr_cmd[i++] = 0x92;
4202 	i += 3;
4203 
4204 	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
4205 	ptr_cmd[i++] = 0x191;
4206 	ptr_cmd[i++] = 0;
4207 
4208 	i += amdgpu_draw_draw(ptr_cmd + i, version);
4209 
4210 	while (i & 7)
4211 		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
4212 
4213 	resources[0] = bo_dst;
4214 	resources[1] = bo_src;
4215 	resources[2] = bo_shader_ps;
4216 	resources[3] = bo_shader_vs;
4217 	resources[4] = bo_cmd;
4218 	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
4219 	CU_ASSERT_EQUAL(r, 0);
4220 
4221 	ib_info.ib_mc_address = mc_address_cmd;
4222 	ib_info.size = i;
4223 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
4224 	ibs_request.ring = ring;
4225 	ibs_request.resources = bo_list;
4226 	ibs_request.number_of_ibs = 1;
4227 	ibs_request.ibs = &ib_info;
4228 	ibs_request.fence_info.handle = NULL;
4229 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
4230 	CU_ASSERT_EQUAL(r, 0);
4231 
4232 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
4233 	fence_status.ip_instance = 0;
4234 	fence_status.ring = ring;
4235 	fence_status.context = context_handle;
4236 	fence_status.fence = ibs_request.seq_no;
4237 
4238 	/* wait for IB accomplished */
4239 	r = amdgpu_cs_query_fence_status(&fence_status,
4240 					 AMDGPU_TIMEOUT_INFINITE,
4241 					 0, &expired);
4242 
4243 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4244 	CU_ASSERT_EQUAL(r, 0);
4245 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4246 
4247 	r = amdgpu_bo_list_destroy(bo_list);
4248 	CU_ASSERT_EQUAL(r, 0);
4249 
4250 	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4251 	CU_ASSERT_EQUAL(r, 0);
4252 	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4253 	CU_ASSERT_EQUAL(r, 0);
4254 
4255 	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4256 	CU_ASSERT_EQUAL(r, 0);
4257 
4258 	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
4259 	CU_ASSERT_EQUAL(r, 0);
4260 	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
4261 	CU_ASSERT_EQUAL(r, 0);
4262 
4263 	r = amdgpu_cs_ctx_free(context_handle);
4264 	CU_ASSERT_EQUAL(r, 0);
4265 }
4266 
amdgpu_gpu_reset_test(void)4267 static void amdgpu_gpu_reset_test(void)
4268 {
4269 	int r;
4270 	char debugfs_path[256], tmp[10];
4271 	int fd;
4272 	struct stat sbuf;
4273 	amdgpu_context_handle context_handle;
4274 	uint32_t hang_state, hangs;
4275 
4276 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4277 	CU_ASSERT_EQUAL(r, 0);
4278 
4279 	r = fstat(drm_amdgpu[0], &sbuf);
4280 	CU_ASSERT_EQUAL(r, 0);
4281 
4282 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
4283 	fd = open(debugfs_path, O_RDONLY);
4284 	CU_ASSERT(fd >= 0);
4285 
4286 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
4287 	CU_ASSERT(r > 0);
4288 
4289 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4290 	CU_ASSERT_EQUAL(r, 0);
4291 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4292 
4293 	close(fd);
4294 	r = amdgpu_cs_ctx_free(context_handle);
4295 	CU_ASSERT_EQUAL(r, 0);
4296 
4297 	amdgpu_compute_dispatch_test();
4298 	amdgpu_gfx_dispatch_test();
4299 }
4300 
amdgpu_stable_pstate_test(void)4301 static void amdgpu_stable_pstate_test(void)
4302 {
4303 	int r;
4304 	amdgpu_context_handle context_handle;
4305 	uint32_t current_pstate = 0, new_pstate = 0;
4306 
4307 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4308 	CU_ASSERT_EQUAL(r, 0);
4309 
4310 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4311 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4312 					0, &current_pstate);
4313 	CU_ASSERT_EQUAL(r, 0);
4314 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
4315 
4316 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4317 					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
4318 					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
4319 	CU_ASSERT_EQUAL(r, 0);
4320 
4321 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4322 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4323 					0, &new_pstate);
4324 	CU_ASSERT_EQUAL(r, 0);
4325 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
4326 
4327 	r = amdgpu_cs_ctx_free(context_handle);
4328 	CU_ASSERT_EQUAL(r, 0);
4329 }
4330