1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
49 static uint32_t chip_id;
50 static uint32_t chip_rev;
51
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 unsigned ip_type,
72 int instance, int pm4_dw, uint32_t *pm4_src,
73 int res_cnt, amdgpu_bo_handle *resources,
74 struct amdgpu_cs_ib_info *ib_info,
75 struct amdgpu_cs_request *ibs_request);
76
77 CU_TestInfo basic_tests[] = {
78 { "Query Info Test", amdgpu_query_info_test },
79 { "Userptr Test", amdgpu_userptr_test },
80 { "bo eviction Test", amdgpu_bo_eviction_test },
81 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
82 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 { "SW semaphore Test", amdgpu_semaphore_test },
86 { "Sync dependency Test", amdgpu_sync_dependency_test },
87 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
88 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
89 { "Draw Test", amdgpu_draw_test },
90 { "GPU reset Test", amdgpu_gpu_reset_test },
91 { "Stable pstate Test", amdgpu_stable_pstate_test },
92 CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask 0x000000FF
97 #define SDMA_PKT_HEADER_op_shift 0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL 11
100 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
101 /* 0 = byte fill
102 * 2 = DW fill
103 */
104 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
105 (((sub_op) & 0xFF) << 8) | \
106 (((op) & 0xFF) << 0))
107 #define SDMA_OPCODE_WRITE 2
108 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
109 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
110
111 #define SDMA_OPCODE_COPY 1
112 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
113
114 #define SDMA_OPCODE_ATOMIC 10
115 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
116 /* 0 - single_pass_atomic.
117 * 1 - loop_until_compare_satisfied.
118 */
119 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
120 /* 0 - non-TMZ.
121 * 1 - TMZ.
122 */
123 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
124 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 * same as Packet 3
126 */
127
128 #define GFX_COMPUTE_NOP 0xffff1000
129 #define SDMA_NOP 0x0
130
131 /* PM4 */
132 #define PACKET_TYPE0 0
133 #define PACKET_TYPE1 1
134 #define PACKET_TYPE2 2
135 #define PACKET_TYPE3 3
136
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
142 ((reg) & 0xFFFF) | \
143 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2 0x80000000
145 #define PACKET2_PAD_SHIFT 0
146 #define PACKET2_PAD_MASK (0x3fffffff << 0)
147
148 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149
150 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
151 (((op) & 0xFF) << 8) | \
152 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154
155 /* Packet 3 types */
156 #define PACKET3_NOP 0x10
157
158 #define PACKET3_WRITE_DATA 0x37
159 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
160 /* 0 - register
161 * 1 - memory (sync - via GRBM)
162 * 2 - gl2
163 * 3 - gds
164 * 4 - reserved
165 * 5 - memory (async - direct)
166 */
167 #define WR_ONE_ADDR (1 << 16)
168 #define WR_CONFIRM (1 << 20)
169 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
170 /* 0 - LRU
171 * 1 - Stream
172 */
173 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
174 /* 0 - me
175 * 1 - pfp
176 * 2 - ce
177 */
178
179 #define PACKET3_ATOMIC_MEM 0x1E
180 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
181 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
182 /* 0 - single_pass_atomic.
183 * 1 - loop_until_compare_satisfied.
184 */
185 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
186 /* 0 - lru.
187 * 1 - stream.
188 */
189 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
190 /* 0 - micro_engine.
191 */
192
193 #define PACKET3_DMA_DATA 0x50
194 /* 1. header
195 * 2. CONTROL
196 * 3. SRC_ADDR_LO or DATA [31:0]
197 * 4. SRC_ADDR_HI [31:0]
198 * 5. DST_ADDR_LO [31:0]
199 * 6. DST_ADDR_HI [7:0]
200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201 */
202 /* CONTROL */
203 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
204 /* 0 - ME
205 * 1 - PFP
206 */
207 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 /* 0 - LRU
209 * 1 - Stream
210 * 2 - Bypass
211 */
212 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
214 /* 0 - DST_ADDR using DAS
215 * 1 - GDS
216 * 3 - DST_ADDR using L2
217 */
218 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 /* 0 - LRU
220 * 1 - Stream
221 * 2 - Bypass
222 */
223 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
225 /* 0 - SRC_ADDR using SAS
226 * 1 - GDS
227 * 2 - DATA
228 * 3 - SRC_ADDR using L2
229 */
230 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
231 /* COMMAND */
232 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
233 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 /* 0 - none
235 * 1 - 8 in 16
236 * 2 - 8 in 32
237 * 3 - 8 in 64
238 */
239 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 /* 0 - none
241 * 1 - 8 in 16
242 * 2 - 8 in 32
243 * 3 - 8 in 64
244 */
245 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
246 /* 0 - memory
247 * 1 - register
248 */
249 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
250 /* 0 - memory
251 * 1 - register
252 */
253 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
254 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
255 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
256
257 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
258 (((b) & 0x1) << 26) | \
259 (((t) & 0x1) << 23) | \
260 (((s) & 0x1) << 22) | \
261 (((cnt) & 0xFFFFF) << 0))
262 #define SDMA_OPCODE_COPY_SI 3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
264 #define SDMA_NOP_SI 0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define PACKET3_DMA_DATA_SI 0x41
267 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
268 /* 0 - ME
269 * 1 - PFP
270 */
271 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
272 /* 0 - DST_ADDR using DAS
273 * 1 - GDS
274 * 3 - DST_ADDR using L2
275 */
276 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
277 /* 0 - SRC_ADDR using SAS
278 * 1 - GDS
279 * 2 - DATA
280 * 3 - SRC_ADDR using L2
281 */
282 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
283
284
285 #define PKT3_CONTEXT_CONTROL 0x28
286 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
287 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
288 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
289
290 #define PKT3_CLEAR_STATE 0x12
291
292 #define PKT3_SET_SH_REG 0x76
293 #define PACKET3_SET_SH_REG_START 0x00002c00
294
295 #define PKT3_SET_SH_REG_INDEX 0x9B
296
297 #define PACKET3_DISPATCH_DIRECT 0x15
298 #define PACKET3_EVENT_WRITE 0x46
299 #define PACKET3_ACQUIRE_MEM 0x58
300 #define PACKET3_SET_CONTEXT_REG 0x69
301 #define PACKET3_SET_UCONFIG_REG 0x79
302 #define PACKET3_DRAW_INDEX_AUTO 0x2D
303 /* gfx 8 */
304 #define mmCOMPUTE_PGM_LO 0x2e0c
305 #define mmCOMPUTE_PGM_RSRC1 0x2e12
306 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
307 #define mmCOMPUTE_USER_DATA_0 0x2e40
308 #define mmCOMPUTE_USER_DATA_1 0x2e41
309 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
310 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
311
312
313
314 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315 ((num & 0x0000ff00) << 8) | \
316 ((num & 0x00ff0000) >> 8) | \
317 ((num & 0x000000ff) << 24))
318
319
320 /* Shader code
321 * void main()
322 {
323
324 float x = some_input;
325 for (unsigned i = 0; i < 1000000; i++)
326 x = sin(x);
327
328 u[0] = 42u;
329 }
330 */
331
332 static uint32_t shader_bin[] = {
333 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337 };
338
339 #define CODE_OFFSET 512
340 #define DATA_OFFSET 1024
341
342 enum cs_type {
343 CS_BUFFERCLEAR,
344 CS_BUFFERCOPY,
345 CS_HANG,
346 CS_HANG_SLOW
347 };
348
349 static const uint32_t bufferclear_cs_shader_gfx9[] = {
350 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353 0xbf810000
354 };
355
356 static const uint32_t bufferclear_cs_shader_gfx10[] = {
357 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359 0xBF810000
360 };
361
362 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
364 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
365 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368 };
369
370 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371
372 static const uint32_t buffercopy_cs_shader_gfx9[] = {
373 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375 0xe01c2000, 0x80010200, 0xbf810000
376 };
377
378 static const uint32_t buffercopy_cs_shader_gfx10[] = {
379 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381 };
382
383 static const uint32_t preamblecache_gfx9[] = {
384 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
390 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403 0xc0017900, 0x24b, 0x0
404 };
405
406 static const uint32_t preamblecache_gfx10[] = {
407 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430 };
431
432 enum ps_type {
433 PS_CONST,
434 PS_TEX,
435 PS_HANG,
436 PS_HANG_SLOW
437 };
438
439 static const uint32_t ps_const_shader_gfx9[] = {
440 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442 0xC4001C0F, 0x00000100, 0xBF810000
443 };
444
445 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446
447 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458 }
459 };
460
461 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462 0x00000004
463 };
464
465 static const uint32_t ps_num_sh_registers_gfx9 = 2;
466
467 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470 };
471
472 static const uint32_t ps_num_context_registers_gfx9 = 7;
473
474 static const uint32_t ps_const_context_reg_gfx9[][2] = {
475 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
476 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
477 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
478 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
479 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
480 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
481 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
482 };
483
484 static const uint32_t ps_const_shader_gfx10[] = {
485 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487 0xF8001C0F, 0x00000100, 0xBF810000
488 };
489
490 static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491
492 static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497 { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498 { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499 { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500 { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501 { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503 }
504 };
505
506 static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507 0x00000004
508 };
509
510 static const uint32_t ps_num_sh_registers_gfx10 = 2;
511
512 static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513 {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515 };
516
517 static const uint32_t ps_tex_shader_gfx9[] = {
518 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522 0x00000100, 0xBF810000
523 };
524
525 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526 0x0000000B
527 };
528
529 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530
531 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542 }
543 };
544
545 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548 };
549
550 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
552 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
553 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
554 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
555 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
556 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
557 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
558 };
559
560 static const uint32_t ps_tex_shader_gfx10[] = {
561 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565 0xF8001C0F, 0x00000100, 0xBF810000
566 };
567
568 static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569 0x0000000C
570 };
571
572 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573
574 static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579 { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580 { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581 { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582 { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583 { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585 }
586 };
587
588 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596 0xC400020F, 0x05060403, 0xBF810000
597 };
598
599 static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607 0xBF810000
608 };
609
610 static const uint32_t cached_cmd_gfx9[] = {
611 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
615 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617 0xc0026900, 0x292, 0x20, 0x60201b8,
618 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619 };
620
621 static const uint32_t cached_cmd_gfx10[] = {
622 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628 0xc0026900, 0x292, 0x20, 0x6020000,
629 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630 };
631
632 unsigned int memcpy_ps_hang[] = {
633 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637 0xF800180F, 0x03020100, 0xBF810000
638 };
639
640 struct amdgpu_test_shader {
641 uint32_t *shader;
642 uint32_t header_length;
643 uint32_t body_length;
644 uint32_t foot_length;
645 };
646
647 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650 };
651
652 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653 memcpy_cs_hang_slow_ai_codes,
654 4,
655 3,
656 1
657 };
658
659 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662 };
663
664 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665 memcpy_cs_hang_slow_rv_codes,
666 4,
667 3,
668 1
669 };
670
671 unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674 };
675
676 struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677 memcpy_cs_hang_slow_nv_codes,
678 4,
679 3,
680 1
681 };
682
683 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688 0x03020100, 0xbf810000
689 };
690
691 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692 memcpy_ps_hang_slow_ai_codes,
693 7,
694 2,
695 9
696 };
697
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)698 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699 unsigned alignment, unsigned heap, uint64_t alloc_flags,
700 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701 uint64_t *mc_address,
702 amdgpu_va_handle *va_handle)
703 {
704 struct amdgpu_bo_alloc_request request = {};
705 amdgpu_bo_handle buf_handle;
706 amdgpu_va_handle handle;
707 uint64_t vmc_addr;
708 int r;
709
710 request.alloc_size = size;
711 request.phys_alignment = alignment;
712 request.preferred_heap = heap;
713 request.flags = alloc_flags;
714
715 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716 if (r)
717 return r;
718
719 r = amdgpu_va_range_alloc(dev,
720 amdgpu_gpu_va_range_general,
721 size, alignment, 0, &vmc_addr,
722 &handle, 0);
723 if (r)
724 goto error_va_alloc;
725
726 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
727 AMDGPU_VM_PAGE_READABLE |
728 AMDGPU_VM_PAGE_WRITEABLE |
729 AMDGPU_VM_PAGE_EXECUTABLE |
730 mapping_flags,
731 AMDGPU_VA_OP_MAP);
732 if (r)
733 goto error_va_map;
734
735 r = amdgpu_bo_cpu_map(buf_handle, cpu);
736 if (r)
737 goto error_cpu_map;
738
739 *bo = buf_handle;
740 *mc_address = vmc_addr;
741 *va_handle = handle;
742
743 return 0;
744
745 error_cpu_map:
746 amdgpu_bo_cpu_unmap(buf_handle);
747
748 error_va_map:
749 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750
751 error_va_alloc:
752 amdgpu_bo_free(buf_handle);
753 return r;
754 }
755
756
757
suite_basic_tests_enable(void)758 CU_BOOL suite_basic_tests_enable(void)
759 {
760
761 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762 &minor_version, &device_handle))
763 return CU_FALSE;
764
765
766 family_id = device_handle->info.family_id;
767 chip_id = device_handle->info.chip_external_rev;
768 chip_rev = device_handle->info.chip_rev;
769
770 if (amdgpu_device_deinitialize(device_handle))
771 return CU_FALSE;
772
773 /* disable gfx engine basic test cases for some asics have no CPG */
774 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775 if (amdgpu_set_test_active("Basic Tests",
776 "Command submission Test (GFX)",
777 CU_FALSE))
778 fprintf(stderr, "test deactivation failed - %s\n",
779 CU_get_error_msg());
780
781 if (amdgpu_set_test_active("Basic Tests",
782 "Command submission Test (Multi-Fence)",
783 CU_FALSE))
784 fprintf(stderr, "test deactivation failed - %s\n",
785 CU_get_error_msg());
786
787 if (amdgpu_set_test_active("Basic Tests",
788 "Sync dependency Test",
789 CU_FALSE))
790 fprintf(stderr, "test deactivation failed - %s\n",
791 CU_get_error_msg());
792 }
793
794 return CU_TRUE;
795 }
796
suite_basic_tests_init(void)797 int suite_basic_tests_init(void)
798 {
799 struct amdgpu_gpu_info gpu_info = {0};
800 int r;
801
802 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803 &minor_version, &device_handle);
804
805 if (r) {
806 if ((r == -EACCES) && (errno == EACCES))
807 printf("\n\nError:%s. "
808 "Hint:Try to run this test program as root.",
809 strerror(errno));
810 return CUE_SINIT_FAILED;
811 }
812
813 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814 if (r)
815 return CUE_SINIT_FAILED;
816
817 family_id = gpu_info.family_id;
818
819 return CUE_SUCCESS;
820 }
821
suite_basic_tests_clean(void)822 int suite_basic_tests_clean(void)
823 {
824 int r = amdgpu_device_deinitialize(device_handle);
825
826 if (r == 0)
827 return CUE_SUCCESS;
828 else
829 return CUE_SCLEAN_FAILED;
830 }
831
amdgpu_query_info_test(void)832 static void amdgpu_query_info_test(void)
833 {
834 struct amdgpu_gpu_info gpu_info = {0};
835 uint32_t version, feature;
836 int r;
837
838 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839 CU_ASSERT_EQUAL(r, 0);
840
841 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842 0, &version, &feature);
843 CU_ASSERT_EQUAL(r, 0);
844 }
845
amdgpu_command_submission_gfx_separate_ibs(void)846 static void amdgpu_command_submission_gfx_separate_ibs(void)
847 {
848 amdgpu_context_handle context_handle;
849 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850 void *ib_result_cpu, *ib_result_ce_cpu;
851 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852 struct amdgpu_cs_request ibs_request = {0};
853 struct amdgpu_cs_ib_info ib_info[2];
854 struct amdgpu_cs_fence fence_status = {0};
855 uint32_t *ptr;
856 uint32_t expired;
857 amdgpu_bo_list_handle bo_list;
858 amdgpu_va_handle va_handle, va_handle_ce;
859 int r, i = 0;
860
861 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
862 CU_ASSERT_EQUAL(r, 0);
863
864 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
865 AMDGPU_GEM_DOMAIN_GTT, 0,
866 &ib_result_handle, &ib_result_cpu,
867 &ib_result_mc_address, &va_handle);
868 CU_ASSERT_EQUAL(r, 0);
869
870 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
871 AMDGPU_GEM_DOMAIN_GTT, 0,
872 &ib_result_ce_handle, &ib_result_ce_cpu,
873 &ib_result_ce_mc_address, &va_handle_ce);
874 CU_ASSERT_EQUAL(r, 0);
875
876 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
877 ib_result_ce_handle, &bo_list);
878 CU_ASSERT_EQUAL(r, 0);
879
880 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
881
882 /* IT_SET_CE_DE_COUNTERS */
883 ptr = ib_result_ce_cpu;
884 if (family_id != AMDGPU_FAMILY_SI) {
885 ptr[i++] = 0xc0008900;
886 ptr[i++] = 0;
887 }
888 ptr[i++] = 0xc0008400;
889 ptr[i++] = 1;
890 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
891 ib_info[0].size = i;
892 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
893
894 /* IT_WAIT_ON_CE_COUNTER */
895 ptr = ib_result_cpu;
896 ptr[0] = 0xc0008600;
897 ptr[1] = 0x00000001;
898 ib_info[1].ib_mc_address = ib_result_mc_address;
899 ib_info[1].size = 2;
900
901 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
902 ibs_request.number_of_ibs = 2;
903 ibs_request.ibs = ib_info;
904 ibs_request.resources = bo_list;
905 ibs_request.fence_info.handle = NULL;
906
907 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
908
909 CU_ASSERT_EQUAL(r, 0);
910
911 fence_status.context = context_handle;
912 fence_status.ip_type = AMDGPU_HW_IP_GFX;
913 fence_status.ip_instance = 0;
914 fence_status.fence = ibs_request.seq_no;
915
916 r = amdgpu_cs_query_fence_status(&fence_status,
917 AMDGPU_TIMEOUT_INFINITE,
918 0, &expired);
919 CU_ASSERT_EQUAL(r, 0);
920
921 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
922 ib_result_mc_address, 4096);
923 CU_ASSERT_EQUAL(r, 0);
924
925 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
926 ib_result_ce_mc_address, 4096);
927 CU_ASSERT_EQUAL(r, 0);
928
929 r = amdgpu_bo_list_destroy(bo_list);
930 CU_ASSERT_EQUAL(r, 0);
931
932 r = amdgpu_cs_ctx_free(context_handle);
933 CU_ASSERT_EQUAL(r, 0);
934
935 }
936
amdgpu_command_submission_gfx_shared_ib(void)937 static void amdgpu_command_submission_gfx_shared_ib(void)
938 {
939 amdgpu_context_handle context_handle;
940 amdgpu_bo_handle ib_result_handle;
941 void *ib_result_cpu;
942 uint64_t ib_result_mc_address;
943 struct amdgpu_cs_request ibs_request = {0};
944 struct amdgpu_cs_ib_info ib_info[2];
945 struct amdgpu_cs_fence fence_status = {0};
946 uint32_t *ptr;
947 uint32_t expired;
948 amdgpu_bo_list_handle bo_list;
949 amdgpu_va_handle va_handle;
950 int r, i = 0;
951
952 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
953 CU_ASSERT_EQUAL(r, 0);
954
955 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
956 AMDGPU_GEM_DOMAIN_GTT, 0,
957 &ib_result_handle, &ib_result_cpu,
958 &ib_result_mc_address, &va_handle);
959 CU_ASSERT_EQUAL(r, 0);
960
961 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
962 &bo_list);
963 CU_ASSERT_EQUAL(r, 0);
964
965 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
966
967 /* IT_SET_CE_DE_COUNTERS */
968 ptr = ib_result_cpu;
969 if (family_id != AMDGPU_FAMILY_SI) {
970 ptr[i++] = 0xc0008900;
971 ptr[i++] = 0;
972 }
973 ptr[i++] = 0xc0008400;
974 ptr[i++] = 1;
975 ib_info[0].ib_mc_address = ib_result_mc_address;
976 ib_info[0].size = i;
977 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
978
979 ptr = (uint32_t *)ib_result_cpu + 4;
980 ptr[0] = 0xc0008600;
981 ptr[1] = 0x00000001;
982 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
983 ib_info[1].size = 2;
984
985 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
986 ibs_request.number_of_ibs = 2;
987 ibs_request.ibs = ib_info;
988 ibs_request.resources = bo_list;
989 ibs_request.fence_info.handle = NULL;
990
991 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
992
993 CU_ASSERT_EQUAL(r, 0);
994
995 fence_status.context = context_handle;
996 fence_status.ip_type = AMDGPU_HW_IP_GFX;
997 fence_status.ip_instance = 0;
998 fence_status.fence = ibs_request.seq_no;
999
1000 r = amdgpu_cs_query_fence_status(&fence_status,
1001 AMDGPU_TIMEOUT_INFINITE,
1002 0, &expired);
1003 CU_ASSERT_EQUAL(r, 0);
1004
1005 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1006 ib_result_mc_address, 4096);
1007 CU_ASSERT_EQUAL(r, 0);
1008
1009 r = amdgpu_bo_list_destroy(bo_list);
1010 CU_ASSERT_EQUAL(r, 0);
1011
1012 r = amdgpu_cs_ctx_free(context_handle);
1013 CU_ASSERT_EQUAL(r, 0);
1014 }
1015
amdgpu_command_submission_gfx_cp_write_data(void)1016 static void amdgpu_command_submission_gfx_cp_write_data(void)
1017 {
1018 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1019 }
1020
amdgpu_command_submission_gfx_cp_const_fill(void)1021 static void amdgpu_command_submission_gfx_cp_const_fill(void)
1022 {
1023 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1024 }
1025
amdgpu_command_submission_gfx_cp_copy_data(void)1026 static void amdgpu_command_submission_gfx_cp_copy_data(void)
1027 {
1028 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1029 }
1030
amdgpu_bo_eviction_test(void)1031 static void amdgpu_bo_eviction_test(void)
1032 {
1033 const int sdma_write_length = 1024;
1034 const int pm4_dw = 256;
1035 amdgpu_context_handle context_handle;
1036 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1037 amdgpu_bo_handle *resources;
1038 uint32_t *pm4;
1039 struct amdgpu_cs_ib_info *ib_info;
1040 struct amdgpu_cs_request *ibs_request;
1041 uint64_t bo1_mc, bo2_mc;
1042 volatile unsigned char *bo1_cpu, *bo2_cpu;
1043 int i, j, r, loop1, loop2;
1044 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1045 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1046 struct amdgpu_heap_info vram_info, gtt_info;
1047
1048 pm4 = calloc(pm4_dw, sizeof(*pm4));
1049 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1050
1051 ib_info = calloc(1, sizeof(*ib_info));
1052 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1053
1054 ibs_request = calloc(1, sizeof(*ibs_request));
1055 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1056
1057 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1058 CU_ASSERT_EQUAL(r, 0);
1059
1060 /* prepare resource */
1061 resources = calloc(4, sizeof(amdgpu_bo_handle));
1062 CU_ASSERT_NOT_EQUAL(resources, NULL);
1063
1064 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1065 0, &vram_info);
1066 CU_ASSERT_EQUAL(r, 0);
1067
1068 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1069 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1070 CU_ASSERT_EQUAL(r, 0);
1071 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1072 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1073 CU_ASSERT_EQUAL(r, 0);
1074
1075 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1076 0, >t_info);
1077 CU_ASSERT_EQUAL(r, 0);
1078
1079 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1080 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
1081 CU_ASSERT_EQUAL(r, 0);
1082 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1083 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
1084 CU_ASSERT_EQUAL(r, 0);
1085
1086
1087
1088 loop1 = loop2 = 0;
1089 /* run 9 circle to test all mapping combination */
1090 while(loop1 < 2) {
1091 while(loop2 < 2) {
1092 /* allocate UC bo1for sDMA use */
1093 r = amdgpu_bo_alloc_and_map(device_handle,
1094 sdma_write_length, 4096,
1095 AMDGPU_GEM_DOMAIN_GTT,
1096 gtt_flags[loop1], &bo1,
1097 (void**)&bo1_cpu, &bo1_mc,
1098 &bo1_va_handle);
1099 CU_ASSERT_EQUAL(r, 0);
1100
1101 /* set bo1 */
1102 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1103
1104 /* allocate UC bo2 for sDMA use */
1105 r = amdgpu_bo_alloc_and_map(device_handle,
1106 sdma_write_length, 4096,
1107 AMDGPU_GEM_DOMAIN_GTT,
1108 gtt_flags[loop2], &bo2,
1109 (void**)&bo2_cpu, &bo2_mc,
1110 &bo2_va_handle);
1111 CU_ASSERT_EQUAL(r, 0);
1112
1113 /* clear bo2 */
1114 memset((void*)bo2_cpu, 0, sdma_write_length);
1115
1116 resources[0] = bo1;
1117 resources[1] = bo2;
1118 resources[2] = vram_max[loop2];
1119 resources[3] = gtt_max[loop2];
1120
1121 /* fulfill PM4: test DMA copy linear */
1122 i = j = 0;
1123 if (family_id == AMDGPU_FAMILY_SI) {
1124 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1125 sdma_write_length);
1126 pm4[i++] = 0xffffffff & bo2_mc;
1127 pm4[i++] = 0xffffffff & bo1_mc;
1128 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1129 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1130 } else {
1131 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1132 if (family_id >= AMDGPU_FAMILY_AI)
1133 pm4[i++] = sdma_write_length - 1;
1134 else
1135 pm4[i++] = sdma_write_length;
1136 pm4[i++] = 0;
1137 pm4[i++] = 0xffffffff & bo1_mc;
1138 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1139 pm4[i++] = 0xffffffff & bo2_mc;
1140 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1141 }
1142
1143 amdgpu_test_exec_cs_helper(context_handle,
1144 AMDGPU_HW_IP_DMA, 0,
1145 i, pm4,
1146 4, resources,
1147 ib_info, ibs_request);
1148
1149 /* verify if SDMA test result meets with expected */
1150 i = 0;
1151 while(i < sdma_write_length) {
1152 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1153 }
1154 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1155 sdma_write_length);
1156 CU_ASSERT_EQUAL(r, 0);
1157 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1158 sdma_write_length);
1159 CU_ASSERT_EQUAL(r, 0);
1160 loop2++;
1161 }
1162 loop2 = 0;
1163 loop1++;
1164 }
1165 amdgpu_bo_free(vram_max[0]);
1166 amdgpu_bo_free(vram_max[1]);
1167 amdgpu_bo_free(gtt_max[0]);
1168 amdgpu_bo_free(gtt_max[1]);
1169 /* clean resources */
1170 free(resources);
1171 free(ibs_request);
1172 free(ib_info);
1173 free(pm4);
1174
1175 /* end of test */
1176 r = amdgpu_cs_ctx_free(context_handle);
1177 CU_ASSERT_EQUAL(r, 0);
1178 }
1179
1180
amdgpu_command_submission_gfx(void)1181 static void amdgpu_command_submission_gfx(void)
1182 {
1183 /* write data using the CP */
1184 amdgpu_command_submission_gfx_cp_write_data();
1185 /* const fill using the CP */
1186 amdgpu_command_submission_gfx_cp_const_fill();
1187 /* copy data using the CP */
1188 amdgpu_command_submission_gfx_cp_copy_data();
1189 /* separate IB buffers for multi-IB submission */
1190 amdgpu_command_submission_gfx_separate_ibs();
1191 /* shared IB buffer for multi-IB submission */
1192 amdgpu_command_submission_gfx_shared_ib();
1193 }
1194
amdgpu_semaphore_test(void)1195 static void amdgpu_semaphore_test(void)
1196 {
1197 amdgpu_context_handle context_handle[2];
1198 amdgpu_semaphore_handle sem;
1199 amdgpu_bo_handle ib_result_handle[2];
1200 void *ib_result_cpu[2];
1201 uint64_t ib_result_mc_address[2];
1202 struct amdgpu_cs_request ibs_request[2] = {0};
1203 struct amdgpu_cs_ib_info ib_info[2] = {0};
1204 struct amdgpu_cs_fence fence_status = {0};
1205 uint32_t *ptr;
1206 uint32_t expired;
1207 uint32_t sdma_nop, gfx_nop;
1208 amdgpu_bo_list_handle bo_list[2];
1209 amdgpu_va_handle va_handle[2];
1210 int r, i;
1211 struct amdgpu_gpu_info gpu_info = {0};
1212 unsigned gc_ip_type;
1213
1214 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1215 CU_ASSERT_EQUAL(r, 0);
1216
1217 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1218 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1219
1220 if (family_id == AMDGPU_FAMILY_SI) {
1221 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1222 gfx_nop = GFX_COMPUTE_NOP_SI;
1223 } else {
1224 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1225 gfx_nop = GFX_COMPUTE_NOP;
1226 }
1227
1228 r = amdgpu_cs_create_semaphore(&sem);
1229 CU_ASSERT_EQUAL(r, 0);
1230 for (i = 0; i < 2; i++) {
1231 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1232 CU_ASSERT_EQUAL(r, 0);
1233
1234 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1235 AMDGPU_GEM_DOMAIN_GTT, 0,
1236 &ib_result_handle[i], &ib_result_cpu[i],
1237 &ib_result_mc_address[i], &va_handle[i]);
1238 CU_ASSERT_EQUAL(r, 0);
1239
1240 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1241 NULL, &bo_list[i]);
1242 CU_ASSERT_EQUAL(r, 0);
1243 }
1244
1245 /* 1. same context different engine */
1246 ptr = ib_result_cpu[0];
1247 ptr[0] = sdma_nop;
1248 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1249 ib_info[0].size = 1;
1250
1251 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1252 ibs_request[0].number_of_ibs = 1;
1253 ibs_request[0].ibs = &ib_info[0];
1254 ibs_request[0].resources = bo_list[0];
1255 ibs_request[0].fence_info.handle = NULL;
1256 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1257 CU_ASSERT_EQUAL(r, 0);
1258 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1259 CU_ASSERT_EQUAL(r, 0);
1260
1261 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1262 CU_ASSERT_EQUAL(r, 0);
1263 ptr = ib_result_cpu[1];
1264 ptr[0] = gfx_nop;
1265 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1266 ib_info[1].size = 1;
1267
1268 ibs_request[1].ip_type = gc_ip_type;
1269 ibs_request[1].number_of_ibs = 1;
1270 ibs_request[1].ibs = &ib_info[1];
1271 ibs_request[1].resources = bo_list[1];
1272 ibs_request[1].fence_info.handle = NULL;
1273
1274 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1275 CU_ASSERT_EQUAL(r, 0);
1276
1277 fence_status.context = context_handle[0];
1278 fence_status.ip_type = gc_ip_type;
1279 fence_status.ip_instance = 0;
1280 fence_status.fence = ibs_request[1].seq_no;
1281 r = amdgpu_cs_query_fence_status(&fence_status,
1282 500000000, 0, &expired);
1283 CU_ASSERT_EQUAL(r, 0);
1284 CU_ASSERT_EQUAL(expired, true);
1285
1286 /* 2. same engine different context */
1287 ptr = ib_result_cpu[0];
1288 ptr[0] = gfx_nop;
1289 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1290 ib_info[0].size = 1;
1291
1292 ibs_request[0].ip_type = gc_ip_type;
1293 ibs_request[0].number_of_ibs = 1;
1294 ibs_request[0].ibs = &ib_info[0];
1295 ibs_request[0].resources = bo_list[0];
1296 ibs_request[0].fence_info.handle = NULL;
1297 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1298 CU_ASSERT_EQUAL(r, 0);
1299 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1300 CU_ASSERT_EQUAL(r, 0);
1301
1302 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1303 CU_ASSERT_EQUAL(r, 0);
1304 ptr = ib_result_cpu[1];
1305 ptr[0] = gfx_nop;
1306 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1307 ib_info[1].size = 1;
1308
1309 ibs_request[1].ip_type = gc_ip_type;
1310 ibs_request[1].number_of_ibs = 1;
1311 ibs_request[1].ibs = &ib_info[1];
1312 ibs_request[1].resources = bo_list[1];
1313 ibs_request[1].fence_info.handle = NULL;
1314 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1315
1316 CU_ASSERT_EQUAL(r, 0);
1317
1318 fence_status.context = context_handle[1];
1319 fence_status.ip_type = gc_ip_type;
1320 fence_status.ip_instance = 0;
1321 fence_status.fence = ibs_request[1].seq_no;
1322 r = amdgpu_cs_query_fence_status(&fence_status,
1323 500000000, 0, &expired);
1324 CU_ASSERT_EQUAL(r, 0);
1325 CU_ASSERT_EQUAL(expired, true);
1326
1327 for (i = 0; i < 2; i++) {
1328 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1329 ib_result_mc_address[i], 4096);
1330 CU_ASSERT_EQUAL(r, 0);
1331
1332 r = amdgpu_bo_list_destroy(bo_list[i]);
1333 CU_ASSERT_EQUAL(r, 0);
1334
1335 r = amdgpu_cs_ctx_free(context_handle[i]);
1336 CU_ASSERT_EQUAL(r, 0);
1337 }
1338
1339 r = amdgpu_cs_destroy_semaphore(sem);
1340 CU_ASSERT_EQUAL(r, 0);
1341 }
1342
amdgpu_command_submission_compute_nop(void)1343 static void amdgpu_command_submission_compute_nop(void)
1344 {
1345 amdgpu_context_handle context_handle;
1346 amdgpu_bo_handle ib_result_handle;
1347 void *ib_result_cpu;
1348 uint64_t ib_result_mc_address;
1349 struct amdgpu_cs_request ibs_request;
1350 struct amdgpu_cs_ib_info ib_info;
1351 struct amdgpu_cs_fence fence_status;
1352 uint32_t *ptr;
1353 uint32_t expired;
1354 int r, instance;
1355 amdgpu_bo_list_handle bo_list;
1356 amdgpu_va_handle va_handle;
1357 struct drm_amdgpu_info_hw_ip info;
1358
1359 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1360 CU_ASSERT_EQUAL(r, 0);
1361
1362 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1363 CU_ASSERT_EQUAL(r, 0);
1364
1365 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1366 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1367 AMDGPU_GEM_DOMAIN_GTT, 0,
1368 &ib_result_handle, &ib_result_cpu,
1369 &ib_result_mc_address, &va_handle);
1370 CU_ASSERT_EQUAL(r, 0);
1371
1372 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1373 &bo_list);
1374 CU_ASSERT_EQUAL(r, 0);
1375
1376 ptr = ib_result_cpu;
1377 memset(ptr, 0, 16);
1378 ptr[0]=PACKET3(PACKET3_NOP, 14);
1379
1380 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1381 ib_info.ib_mc_address = ib_result_mc_address;
1382 ib_info.size = 16;
1383
1384 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1385 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1386 ibs_request.ring = instance;
1387 ibs_request.number_of_ibs = 1;
1388 ibs_request.ibs = &ib_info;
1389 ibs_request.resources = bo_list;
1390 ibs_request.fence_info.handle = NULL;
1391
1392 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1393 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1394 CU_ASSERT_EQUAL(r, 0);
1395
1396 fence_status.context = context_handle;
1397 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1398 fence_status.ip_instance = 0;
1399 fence_status.ring = instance;
1400 fence_status.fence = ibs_request.seq_no;
1401
1402 r = amdgpu_cs_query_fence_status(&fence_status,
1403 AMDGPU_TIMEOUT_INFINITE,
1404 0, &expired);
1405 CU_ASSERT_EQUAL(r, 0);
1406
1407 r = amdgpu_bo_list_destroy(bo_list);
1408 CU_ASSERT_EQUAL(r, 0);
1409
1410 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1411 ib_result_mc_address, 4096);
1412 CU_ASSERT_EQUAL(r, 0);
1413 }
1414
1415 r = amdgpu_cs_ctx_free(context_handle);
1416 CU_ASSERT_EQUAL(r, 0);
1417 }
1418
amdgpu_command_submission_compute_cp_write_data(void)1419 static void amdgpu_command_submission_compute_cp_write_data(void)
1420 {
1421 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1422 }
1423
amdgpu_command_submission_compute_cp_const_fill(void)1424 static void amdgpu_command_submission_compute_cp_const_fill(void)
1425 {
1426 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1427 }
1428
amdgpu_command_submission_compute_cp_copy_data(void)1429 static void amdgpu_command_submission_compute_cp_copy_data(void)
1430 {
1431 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1432 }
1433
amdgpu_command_submission_compute(void)1434 static void amdgpu_command_submission_compute(void)
1435 {
1436 /* write data using the CP */
1437 amdgpu_command_submission_compute_cp_write_data();
1438 /* const fill using the CP */
1439 amdgpu_command_submission_compute_cp_const_fill();
1440 /* copy data using the CP */
1441 amdgpu_command_submission_compute_cp_copy_data();
1442 /* nop test */
1443 amdgpu_command_submission_compute_nop();
1444 }
1445
1446 /*
1447 * caller need create/release:
1448 * pm4_src, resources, ib_info, and ibs_request
1449 * submit command stream described in ibs_request and wait for this IB accomplished
1450 */
1451 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1452 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1453 amdgpu_context_handle context_handle,
1454 unsigned ip_type, int instance, int pm4_dw,
1455 uint32_t *pm4_src, int res_cnt,
1456 amdgpu_bo_handle *resources,
1457 struct amdgpu_cs_ib_info *ib_info,
1458 struct amdgpu_cs_request *ibs_request,
1459 bool secure)
1460 {
1461 int r;
1462 uint32_t expired;
1463 uint32_t *ring_ptr;
1464 amdgpu_bo_handle ib_result_handle;
1465 void *ib_result_cpu;
1466 uint64_t ib_result_mc_address;
1467 struct amdgpu_cs_fence fence_status = {0};
1468 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1469 amdgpu_va_handle va_handle;
1470
1471 /* prepare CS */
1472 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1473 CU_ASSERT_NOT_EQUAL(resources, NULL);
1474 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1475 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1476 CU_ASSERT_TRUE(pm4_dw <= 1024);
1477
1478 /* allocate IB */
1479 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1480 AMDGPU_GEM_DOMAIN_GTT, 0,
1481 &ib_result_handle, &ib_result_cpu,
1482 &ib_result_mc_address, &va_handle);
1483 CU_ASSERT_EQUAL(r, 0);
1484
1485 /* copy PM4 packet to ring from caller */
1486 ring_ptr = ib_result_cpu;
1487 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1488
1489 ib_info->ib_mc_address = ib_result_mc_address;
1490 ib_info->size = pm4_dw;
1491 if (secure)
1492 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1493
1494 ibs_request->ip_type = ip_type;
1495 ibs_request->ring = instance;
1496 ibs_request->number_of_ibs = 1;
1497 ibs_request->ibs = ib_info;
1498 ibs_request->fence_info.handle = NULL;
1499
1500 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1501 all_res[res_cnt] = ib_result_handle;
1502
1503 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1504 NULL, &ibs_request->resources);
1505 CU_ASSERT_EQUAL(r, 0);
1506
1507 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1508
1509 /* submit CS */
1510 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1511 CU_ASSERT_EQUAL(r, 0);
1512
1513 r = amdgpu_bo_list_destroy(ibs_request->resources);
1514 CU_ASSERT_EQUAL(r, 0);
1515
1516 fence_status.ip_type = ip_type;
1517 fence_status.ip_instance = 0;
1518 fence_status.ring = ibs_request->ring;
1519 fence_status.context = context_handle;
1520 fence_status.fence = ibs_request->seq_no;
1521
1522 /* wait for IB accomplished */
1523 r = amdgpu_cs_query_fence_status(&fence_status,
1524 AMDGPU_TIMEOUT_INFINITE,
1525 0, &expired);
1526 CU_ASSERT_EQUAL(r, 0);
1527 CU_ASSERT_EQUAL(expired, true);
1528
1529 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1530 ib_result_mc_address, 4096);
1531 CU_ASSERT_EQUAL(r, 0);
1532 }
1533
1534 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1535 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1536 unsigned ip_type, int instance, int pm4_dw,
1537 uint32_t *pm4_src, int res_cnt,
1538 amdgpu_bo_handle *resources,
1539 struct amdgpu_cs_ib_info *ib_info,
1540 struct amdgpu_cs_request *ibs_request)
1541 {
1542 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1543 ip_type, instance, pm4_dw, pm4_src,
1544 res_cnt, resources, ib_info,
1545 ibs_request, false);
1546 }
1547
1548 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1549 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1550 device, unsigned
1551 ip_type, bool secure)
1552 {
1553 const int sdma_write_length = 128;
1554 const int pm4_dw = 256;
1555 amdgpu_context_handle context_handle;
1556 amdgpu_bo_handle bo;
1557 amdgpu_bo_handle *resources;
1558 uint32_t *pm4;
1559 struct amdgpu_cs_ib_info *ib_info;
1560 struct amdgpu_cs_request *ibs_request;
1561 uint64_t bo_mc;
1562 volatile uint32_t *bo_cpu;
1563 uint32_t bo_cpu_origin;
1564 int i, j, r, loop, ring_id;
1565 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1566 amdgpu_va_handle va_handle;
1567 struct drm_amdgpu_info_hw_ip hw_ip_info;
1568
1569 pm4 = calloc(pm4_dw, sizeof(*pm4));
1570 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1571
1572 ib_info = calloc(1, sizeof(*ib_info));
1573 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1574
1575 ibs_request = calloc(1, sizeof(*ibs_request));
1576 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1577
1578 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1579 CU_ASSERT_EQUAL(r, 0);
1580
1581 for (i = 0; secure && (i < 2); i++)
1582 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1583
1584 r = amdgpu_cs_ctx_create(device, &context_handle);
1585
1586 CU_ASSERT_EQUAL(r, 0);
1587
1588 /* prepare resource */
1589 resources = calloc(1, sizeof(amdgpu_bo_handle));
1590 CU_ASSERT_NOT_EQUAL(resources, NULL);
1591
1592 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1593 loop = 0;
1594 while(loop < 2) {
1595 /* allocate UC bo for sDMA use */
1596 r = amdgpu_bo_alloc_and_map(device,
1597 sdma_write_length * sizeof(uint32_t),
1598 4096, AMDGPU_GEM_DOMAIN_GTT,
1599 gtt_flags[loop], &bo, (void**)&bo_cpu,
1600 &bo_mc, &va_handle);
1601 CU_ASSERT_EQUAL(r, 0);
1602
1603 /* clear bo */
1604 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1605
1606 resources[0] = bo;
1607
1608 /* fulfill PM4: test DMA write-linear */
1609 i = j = 0;
1610 if (ip_type == AMDGPU_HW_IP_DMA) {
1611 if (family_id == AMDGPU_FAMILY_SI)
1612 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1613 sdma_write_length);
1614 else
1615 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1616 SDMA_WRITE_SUB_OPCODE_LINEAR,
1617 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1618 pm4[i++] = 0xfffffffc & bo_mc;
1619 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1620 if (family_id >= AMDGPU_FAMILY_AI)
1621 pm4[i++] = sdma_write_length - 1;
1622 else if (family_id != AMDGPU_FAMILY_SI)
1623 pm4[i++] = sdma_write_length;
1624 while(j++ < sdma_write_length)
1625 pm4[i++] = 0xdeadbeaf;
1626 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1627 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1628 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1629 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1630 pm4[i++] = 0xfffffffc & bo_mc;
1631 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1632 while(j++ < sdma_write_length)
1633 pm4[i++] = 0xdeadbeaf;
1634 }
1635
1636 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1637 ip_type, ring_id, i, pm4,
1638 1, resources, ib_info,
1639 ibs_request, secure);
1640
1641 /* verify if SDMA test result meets with expected */
1642 i = 0;
1643 if (!secure) {
1644 while(i < sdma_write_length) {
1645 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1646 }
1647 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1648 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1649 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1650 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1651 * command, 1-loop_until_compare_satisfied.
1652 * single_pass_atomic, 0-lru
1653 * engine_sel, 0-micro_engine
1654 */
1655 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1656 ATOMIC_MEM_COMMAND(1) |
1657 ATOMIC_MEM_CACHEPOLICAY(0) |
1658 ATOMIC_MEM_ENGINESEL(0));
1659 pm4[i++] = 0xfffffffc & bo_mc;
1660 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1661 pm4[i++] = 0x12345678;
1662 pm4[i++] = 0x0;
1663 pm4[i++] = 0xdeadbeaf;
1664 pm4[i++] = 0x0;
1665 pm4[i++] = 0x100;
1666 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1667 ip_type, ring_id, i, pm4,
1668 1, resources, ib_info,
1669 ibs_request, true);
1670 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1671 /* restore the bo_cpu to compare */
1672 bo_cpu_origin = bo_cpu[0];
1673 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1674 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1675 * loop, 1-loop_until_compare_satisfied.
1676 * single_pass_atomic, 0-lru
1677 */
1678 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1679 0,
1680 SDMA_ATOMIC_LOOP(1) |
1681 SDMA_ATOMIC_TMZ(1) |
1682 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1683 pm4[i++] = 0xfffffffc & bo_mc;
1684 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1685 pm4[i++] = 0x12345678;
1686 pm4[i++] = 0x0;
1687 pm4[i++] = 0xdeadbeaf;
1688 pm4[i++] = 0x0;
1689 pm4[i++] = 0x100;
1690 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1691 ip_type, ring_id, i, pm4,
1692 1, resources, ib_info,
1693 ibs_request, true);
1694 /* DMA's atomic behavir is unlike GFX
1695 * If the comparing data is not equal to destination data,
1696 * For GFX, loop again till gfx timeout(system hang).
1697 * For DMA, loop again till timer expired and then send interrupt.
1698 * So testcase can't use interrupt mechanism.
1699 * We take another way to verify. When the comparing data is not
1700 * equal to destination data, overwrite the source data to the destination
1701 * buffer. Otherwise, original destination data unchanged.
1702 * So if the bo_cpu data is overwritten, the result is passed.
1703 */
1704 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1705
1706 /* compare again for the case of dest_data != cmp_data */
1707 i = 0;
1708 /* restore again, here dest_data should be */
1709 bo_cpu_origin = bo_cpu[0];
1710 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1711 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1712 0,
1713 SDMA_ATOMIC_LOOP(1) |
1714 SDMA_ATOMIC_TMZ(1) |
1715 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1716 pm4[i++] = 0xfffffffc & bo_mc;
1717 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1718 pm4[i++] = 0x87654321;
1719 pm4[i++] = 0x0;
1720 pm4[i++] = 0xdeadbeaf;
1721 pm4[i++] = 0x0;
1722 pm4[i++] = 0x100;
1723 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1724 ip_type, ring_id, i, pm4,
1725 1, resources, ib_info,
1726 ibs_request, true);
1727 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1728 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1729 }
1730
1731 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1732 sdma_write_length * sizeof(uint32_t));
1733 CU_ASSERT_EQUAL(r, 0);
1734 loop++;
1735 }
1736 }
1737 /* clean resources */
1738 free(resources);
1739 free(ibs_request);
1740 free(ib_info);
1741 free(pm4);
1742
1743 /* end of test */
1744 r = amdgpu_cs_ctx_free(context_handle);
1745 CU_ASSERT_EQUAL(r, 0);
1746 }
1747
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1748 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1749 {
1750 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1751 ip_type,
1752 false);
1753 }
1754
amdgpu_command_submission_sdma_write_linear(void)1755 static void amdgpu_command_submission_sdma_write_linear(void)
1756 {
1757 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1758 }
1759
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1760 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1761 {
1762 const int sdma_write_length = 1024 * 1024;
1763 const int pm4_dw = 256;
1764 amdgpu_context_handle context_handle;
1765 amdgpu_bo_handle bo;
1766 amdgpu_bo_handle *resources;
1767 uint32_t *pm4;
1768 struct amdgpu_cs_ib_info *ib_info;
1769 struct amdgpu_cs_request *ibs_request;
1770 uint64_t bo_mc;
1771 volatile uint32_t *bo_cpu;
1772 int i, j, r, loop, ring_id;
1773 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1774 amdgpu_va_handle va_handle;
1775 struct drm_amdgpu_info_hw_ip hw_ip_info;
1776
1777 pm4 = calloc(pm4_dw, sizeof(*pm4));
1778 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1779
1780 ib_info = calloc(1, sizeof(*ib_info));
1781 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1782
1783 ibs_request = calloc(1, sizeof(*ibs_request));
1784 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1785
1786 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1787 CU_ASSERT_EQUAL(r, 0);
1788
1789 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1790 CU_ASSERT_EQUAL(r, 0);
1791
1792 /* prepare resource */
1793 resources = calloc(1, sizeof(amdgpu_bo_handle));
1794 CU_ASSERT_NOT_EQUAL(resources, NULL);
1795
1796 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1797 loop = 0;
1798 while(loop < 2) {
1799 /* allocate UC bo for sDMA use */
1800 r = amdgpu_bo_alloc_and_map(device_handle,
1801 sdma_write_length, 4096,
1802 AMDGPU_GEM_DOMAIN_GTT,
1803 gtt_flags[loop], &bo, (void**)&bo_cpu,
1804 &bo_mc, &va_handle);
1805 CU_ASSERT_EQUAL(r, 0);
1806
1807 /* clear bo */
1808 memset((void*)bo_cpu, 0, sdma_write_length);
1809
1810 resources[0] = bo;
1811
1812 /* fulfill PM4: test DMA const fill */
1813 i = j = 0;
1814 if (ip_type == AMDGPU_HW_IP_DMA) {
1815 if (family_id == AMDGPU_FAMILY_SI) {
1816 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1817 0, 0, 0,
1818 sdma_write_length / 4);
1819 pm4[i++] = 0xfffffffc & bo_mc;
1820 pm4[i++] = 0xdeadbeaf;
1821 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1822 } else {
1823 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1824 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1825 pm4[i++] = 0xffffffff & bo_mc;
1826 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1827 pm4[i++] = 0xdeadbeaf;
1828 if (family_id >= AMDGPU_FAMILY_AI)
1829 pm4[i++] = sdma_write_length - 1;
1830 else
1831 pm4[i++] = sdma_write_length;
1832 }
1833 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1834 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1835 if (family_id == AMDGPU_FAMILY_SI) {
1836 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1837 pm4[i++] = 0xdeadbeaf;
1838 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1839 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1840 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1841 PACKET3_DMA_DATA_SI_CP_SYNC;
1842 pm4[i++] = 0xffffffff & bo_mc;
1843 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1844 pm4[i++] = sdma_write_length;
1845 } else {
1846 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1847 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1848 PACKET3_DMA_DATA_DST_SEL(0) |
1849 PACKET3_DMA_DATA_SRC_SEL(2) |
1850 PACKET3_DMA_DATA_CP_SYNC;
1851 pm4[i++] = 0xdeadbeaf;
1852 pm4[i++] = 0;
1853 pm4[i++] = 0xfffffffc & bo_mc;
1854 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1855 pm4[i++] = sdma_write_length;
1856 }
1857 }
1858
1859 amdgpu_test_exec_cs_helper(context_handle,
1860 ip_type, ring_id,
1861 i, pm4,
1862 1, resources,
1863 ib_info, ibs_request);
1864
1865 /* verify if SDMA test result meets with expected */
1866 i = 0;
1867 while(i < (sdma_write_length / 4)) {
1868 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1869 }
1870
1871 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1872 sdma_write_length);
1873 CU_ASSERT_EQUAL(r, 0);
1874 loop++;
1875 }
1876 }
1877 /* clean resources */
1878 free(resources);
1879 free(ibs_request);
1880 free(ib_info);
1881 free(pm4);
1882
1883 /* end of test */
1884 r = amdgpu_cs_ctx_free(context_handle);
1885 CU_ASSERT_EQUAL(r, 0);
1886 }
1887
amdgpu_command_submission_sdma_const_fill(void)1888 static void amdgpu_command_submission_sdma_const_fill(void)
1889 {
1890 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1891 }
1892
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1893 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1894 {
1895 const int sdma_write_length = 1024;
1896 const int pm4_dw = 256;
1897 amdgpu_context_handle context_handle;
1898 amdgpu_bo_handle bo1, bo2;
1899 amdgpu_bo_handle *resources;
1900 uint32_t *pm4;
1901 struct amdgpu_cs_ib_info *ib_info;
1902 struct amdgpu_cs_request *ibs_request;
1903 uint64_t bo1_mc, bo2_mc;
1904 volatile unsigned char *bo1_cpu, *bo2_cpu;
1905 int i, j, r, loop1, loop2, ring_id;
1906 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1907 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1908 struct drm_amdgpu_info_hw_ip hw_ip_info;
1909
1910 pm4 = calloc(pm4_dw, sizeof(*pm4));
1911 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1912
1913 ib_info = calloc(1, sizeof(*ib_info));
1914 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1915
1916 ibs_request = calloc(1, sizeof(*ibs_request));
1917 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1918
1919 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1920 CU_ASSERT_EQUAL(r, 0);
1921
1922 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1923 CU_ASSERT_EQUAL(r, 0);
1924
1925 /* prepare resource */
1926 resources = calloc(2, sizeof(amdgpu_bo_handle));
1927 CU_ASSERT_NOT_EQUAL(resources, NULL);
1928
1929 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1930 loop1 = loop2 = 0;
1931 /* run 9 circle to test all mapping combination */
1932 while(loop1 < 2) {
1933 while(loop2 < 2) {
1934 /* allocate UC bo1for sDMA use */
1935 r = amdgpu_bo_alloc_and_map(device_handle,
1936 sdma_write_length, 4096,
1937 AMDGPU_GEM_DOMAIN_GTT,
1938 gtt_flags[loop1], &bo1,
1939 (void**)&bo1_cpu, &bo1_mc,
1940 &bo1_va_handle);
1941 CU_ASSERT_EQUAL(r, 0);
1942
1943 /* set bo1 */
1944 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1945
1946 /* allocate UC bo2 for sDMA use */
1947 r = amdgpu_bo_alloc_and_map(device_handle,
1948 sdma_write_length, 4096,
1949 AMDGPU_GEM_DOMAIN_GTT,
1950 gtt_flags[loop2], &bo2,
1951 (void**)&bo2_cpu, &bo2_mc,
1952 &bo2_va_handle);
1953 CU_ASSERT_EQUAL(r, 0);
1954
1955 /* clear bo2 */
1956 memset((void*)bo2_cpu, 0, sdma_write_length);
1957
1958 resources[0] = bo1;
1959 resources[1] = bo2;
1960
1961 /* fulfill PM4: test DMA copy linear */
1962 i = j = 0;
1963 if (ip_type == AMDGPU_HW_IP_DMA) {
1964 if (family_id == AMDGPU_FAMILY_SI) {
1965 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1966 0, 0, 0,
1967 sdma_write_length);
1968 pm4[i++] = 0xffffffff & bo2_mc;
1969 pm4[i++] = 0xffffffff & bo1_mc;
1970 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1971 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1972 } else {
1973 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1974 SDMA_COPY_SUB_OPCODE_LINEAR,
1975 0);
1976 if (family_id >= AMDGPU_FAMILY_AI)
1977 pm4[i++] = sdma_write_length - 1;
1978 else
1979 pm4[i++] = sdma_write_length;
1980 pm4[i++] = 0;
1981 pm4[i++] = 0xffffffff & bo1_mc;
1982 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1983 pm4[i++] = 0xffffffff & bo2_mc;
1984 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985 }
1986 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1987 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1988 if (family_id == AMDGPU_FAMILY_SI) {
1989 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1990 pm4[i++] = 0xfffffffc & bo1_mc;
1991 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1992 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1993 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1994 PACKET3_DMA_DATA_SI_CP_SYNC |
1995 (0xffff00000000 & bo1_mc) >> 32;
1996 pm4[i++] = 0xfffffffc & bo2_mc;
1997 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1998 pm4[i++] = sdma_write_length;
1999 } else {
2000 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2001 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2002 PACKET3_DMA_DATA_DST_SEL(0) |
2003 PACKET3_DMA_DATA_SRC_SEL(0) |
2004 PACKET3_DMA_DATA_CP_SYNC;
2005 pm4[i++] = 0xfffffffc & bo1_mc;
2006 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2007 pm4[i++] = 0xfffffffc & bo2_mc;
2008 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2009 pm4[i++] = sdma_write_length;
2010 }
2011 }
2012
2013 amdgpu_test_exec_cs_helper(context_handle,
2014 ip_type, ring_id,
2015 i, pm4,
2016 2, resources,
2017 ib_info, ibs_request);
2018
2019 /* verify if SDMA test result meets with expected */
2020 i = 0;
2021 while(i < sdma_write_length) {
2022 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2023 }
2024 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2025 sdma_write_length);
2026 CU_ASSERT_EQUAL(r, 0);
2027 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2028 sdma_write_length);
2029 CU_ASSERT_EQUAL(r, 0);
2030 loop2++;
2031 }
2032 loop1++;
2033 }
2034 }
2035 /* clean resources */
2036 free(resources);
2037 free(ibs_request);
2038 free(ib_info);
2039 free(pm4);
2040
2041 /* end of test */
2042 r = amdgpu_cs_ctx_free(context_handle);
2043 CU_ASSERT_EQUAL(r, 0);
2044 }
2045
amdgpu_command_submission_sdma_copy_linear(void)2046 static void amdgpu_command_submission_sdma_copy_linear(void)
2047 {
2048 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2049 }
2050
amdgpu_command_submission_sdma(void)2051 static void amdgpu_command_submission_sdma(void)
2052 {
2053 amdgpu_command_submission_sdma_write_linear();
2054 amdgpu_command_submission_sdma_const_fill();
2055 amdgpu_command_submission_sdma_copy_linear();
2056 }
2057
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)2058 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2059 {
2060 amdgpu_context_handle context_handle;
2061 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2062 void *ib_result_cpu, *ib_result_ce_cpu;
2063 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2064 struct amdgpu_cs_request ibs_request[2] = {0};
2065 struct amdgpu_cs_ib_info ib_info[2];
2066 struct amdgpu_cs_fence fence_status[2] = {0};
2067 uint32_t *ptr;
2068 uint32_t expired;
2069 amdgpu_bo_list_handle bo_list;
2070 amdgpu_va_handle va_handle, va_handle_ce;
2071 int r;
2072 int i = 0, ib_cs_num = 2;
2073
2074 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2075 CU_ASSERT_EQUAL(r, 0);
2076
2077 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2078 AMDGPU_GEM_DOMAIN_GTT, 0,
2079 &ib_result_handle, &ib_result_cpu,
2080 &ib_result_mc_address, &va_handle);
2081 CU_ASSERT_EQUAL(r, 0);
2082
2083 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2084 AMDGPU_GEM_DOMAIN_GTT, 0,
2085 &ib_result_ce_handle, &ib_result_ce_cpu,
2086 &ib_result_ce_mc_address, &va_handle_ce);
2087 CU_ASSERT_EQUAL(r, 0);
2088
2089 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2090 ib_result_ce_handle, &bo_list);
2091 CU_ASSERT_EQUAL(r, 0);
2092
2093 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2094
2095 /* IT_SET_CE_DE_COUNTERS */
2096 ptr = ib_result_ce_cpu;
2097 if (family_id != AMDGPU_FAMILY_SI) {
2098 ptr[i++] = 0xc0008900;
2099 ptr[i++] = 0;
2100 }
2101 ptr[i++] = 0xc0008400;
2102 ptr[i++] = 1;
2103 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2104 ib_info[0].size = i;
2105 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2106
2107 /* IT_WAIT_ON_CE_COUNTER */
2108 ptr = ib_result_cpu;
2109 ptr[0] = 0xc0008600;
2110 ptr[1] = 0x00000001;
2111 ib_info[1].ib_mc_address = ib_result_mc_address;
2112 ib_info[1].size = 2;
2113
2114 for (i = 0; i < ib_cs_num; i++) {
2115 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2116 ibs_request[i].number_of_ibs = 2;
2117 ibs_request[i].ibs = ib_info;
2118 ibs_request[i].resources = bo_list;
2119 ibs_request[i].fence_info.handle = NULL;
2120 }
2121
2122 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2123
2124 CU_ASSERT_EQUAL(r, 0);
2125
2126 for (i = 0; i < ib_cs_num; i++) {
2127 fence_status[i].context = context_handle;
2128 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2129 fence_status[i].fence = ibs_request[i].seq_no;
2130 }
2131
2132 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2133 AMDGPU_TIMEOUT_INFINITE,
2134 &expired, NULL);
2135 CU_ASSERT_EQUAL(r, 0);
2136
2137 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2138 ib_result_mc_address, 4096);
2139 CU_ASSERT_EQUAL(r, 0);
2140
2141 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2142 ib_result_ce_mc_address, 4096);
2143 CU_ASSERT_EQUAL(r, 0);
2144
2145 r = amdgpu_bo_list_destroy(bo_list);
2146 CU_ASSERT_EQUAL(r, 0);
2147
2148 r = amdgpu_cs_ctx_free(context_handle);
2149 CU_ASSERT_EQUAL(r, 0);
2150 }
2151
amdgpu_command_submission_multi_fence(void)2152 static void amdgpu_command_submission_multi_fence(void)
2153 {
2154 amdgpu_command_submission_multi_fence_wait_all(true);
2155 amdgpu_command_submission_multi_fence_wait_all(false);
2156 }
2157
amdgpu_userptr_test(void)2158 static void amdgpu_userptr_test(void)
2159 {
2160 int i, r, j;
2161 uint32_t *pm4 = NULL;
2162 uint64_t bo_mc;
2163 void *ptr = NULL;
2164 int pm4_dw = 256;
2165 int sdma_write_length = 4;
2166 amdgpu_bo_handle handle;
2167 amdgpu_context_handle context_handle;
2168 struct amdgpu_cs_ib_info *ib_info;
2169 struct amdgpu_cs_request *ibs_request;
2170 amdgpu_bo_handle buf_handle;
2171 amdgpu_va_handle va_handle;
2172
2173 pm4 = calloc(pm4_dw, sizeof(*pm4));
2174 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2175
2176 ib_info = calloc(1, sizeof(*ib_info));
2177 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2178
2179 ibs_request = calloc(1, sizeof(*ibs_request));
2180 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2181
2182 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2183 CU_ASSERT_EQUAL(r, 0);
2184
2185 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2186 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2187 memset(ptr, 0, BUFFER_SIZE);
2188
2189 r = amdgpu_create_bo_from_user_mem(device_handle,
2190 ptr, BUFFER_SIZE, &buf_handle);
2191 CU_ASSERT_EQUAL(r, 0);
2192
2193 r = amdgpu_va_range_alloc(device_handle,
2194 amdgpu_gpu_va_range_general,
2195 BUFFER_SIZE, 1, 0, &bo_mc,
2196 &va_handle, 0);
2197 CU_ASSERT_EQUAL(r, 0);
2198
2199 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2200 CU_ASSERT_EQUAL(r, 0);
2201
2202 handle = buf_handle;
2203
2204 j = i = 0;
2205
2206 if (family_id == AMDGPU_FAMILY_SI)
2207 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2208 sdma_write_length);
2209 else
2210 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2211 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2212 pm4[i++] = 0xffffffff & bo_mc;
2213 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2214 if (family_id >= AMDGPU_FAMILY_AI)
2215 pm4[i++] = sdma_write_length - 1;
2216 else if (family_id != AMDGPU_FAMILY_SI)
2217 pm4[i++] = sdma_write_length;
2218
2219 while (j++ < sdma_write_length)
2220 pm4[i++] = 0xdeadbeaf;
2221
2222 if (!fork()) {
2223 pm4[0] = 0x0;
2224 exit(0);
2225 }
2226
2227 amdgpu_test_exec_cs_helper(context_handle,
2228 AMDGPU_HW_IP_DMA, 0,
2229 i, pm4,
2230 1, &handle,
2231 ib_info, ibs_request);
2232 i = 0;
2233 while (i < sdma_write_length) {
2234 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2235 }
2236 free(ibs_request);
2237 free(ib_info);
2238 free(pm4);
2239
2240 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2241 CU_ASSERT_EQUAL(r, 0);
2242 r = amdgpu_va_range_free(va_handle);
2243 CU_ASSERT_EQUAL(r, 0);
2244 r = amdgpu_bo_free(buf_handle);
2245 CU_ASSERT_EQUAL(r, 0);
2246 free(ptr);
2247
2248 r = amdgpu_cs_ctx_free(context_handle);
2249 CU_ASSERT_EQUAL(r, 0);
2250
2251 wait(NULL);
2252 }
2253
amdgpu_sync_dependency_test(void)2254 static void amdgpu_sync_dependency_test(void)
2255 {
2256 amdgpu_context_handle context_handle[2];
2257 amdgpu_bo_handle ib_result_handle;
2258 void *ib_result_cpu;
2259 uint64_t ib_result_mc_address;
2260 struct amdgpu_cs_request ibs_request;
2261 struct amdgpu_cs_ib_info ib_info;
2262 struct amdgpu_cs_fence fence_status;
2263 uint32_t expired;
2264 int i, j, r;
2265 amdgpu_bo_list_handle bo_list;
2266 amdgpu_va_handle va_handle;
2267 static uint32_t *ptr;
2268 uint64_t seq_no;
2269
2270 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2271 CU_ASSERT_EQUAL(r, 0);
2272 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2273 CU_ASSERT_EQUAL(r, 0);
2274
2275 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2276 AMDGPU_GEM_DOMAIN_GTT, 0,
2277 &ib_result_handle, &ib_result_cpu,
2278 &ib_result_mc_address, &va_handle);
2279 CU_ASSERT_EQUAL(r, 0);
2280
2281 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2282 &bo_list);
2283 CU_ASSERT_EQUAL(r, 0);
2284
2285 ptr = ib_result_cpu;
2286 i = 0;
2287
2288 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2289
2290 /* Dispatch minimal init config and verify it's executed */
2291 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2292 ptr[i++] = 0x80000000;
2293 ptr[i++] = 0x80000000;
2294
2295 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2296 ptr[i++] = 0x80000000;
2297
2298
2299 /* Program compute regs */
2300 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2301 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2302 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2303 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2304
2305
2306 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2307 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2308 /*
2309 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2310 SGPRS = 1
2311 PRIORITY = 0
2312 FLOAT_MODE = 192 (0xc0)
2313 PRIV = 0
2314 DX10_CLAMP = 1
2315 DEBUG_MODE = 0
2316 IEEE_MODE = 0
2317 BULKY = 0
2318 CDBG_USER = 0
2319 *
2320 */
2321 ptr[i++] = 0x002c0040;
2322
2323
2324 /*
2325 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2326 USER_SGPR = 8
2327 TRAP_PRESENT = 0
2328 TGID_X_EN = 0
2329 TGID_Y_EN = 0
2330 TGID_Z_EN = 0
2331 TG_SIZE_EN = 0
2332 TIDIG_COMP_CNT = 0
2333 EXCP_EN_MSB = 0
2334 LDS_SIZE = 0
2335 EXCP_EN = 0
2336 *
2337 */
2338 ptr[i++] = 0x00000010;
2339
2340
2341 /*
2342 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2343 WAVESIZE = 0
2344 *
2345 */
2346 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2347 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2348 ptr[i++] = 0x00000100;
2349
2350 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2351 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2352 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2353 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2354
2355 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2356 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2357 ptr[i++] = 0;
2358
2359 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2360 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2361 ptr[i++] = 1;
2362 ptr[i++] = 1;
2363 ptr[i++] = 1;
2364
2365
2366 /* Dispatch */
2367 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2368 ptr[i++] = 1;
2369 ptr[i++] = 1;
2370 ptr[i++] = 1;
2371 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2372
2373
2374 while (i & 7)
2375 ptr[i++] = 0xffff1000; /* type3 nop packet */
2376
2377 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2378 ib_info.ib_mc_address = ib_result_mc_address;
2379 ib_info.size = i;
2380
2381 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2382 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2383 ibs_request.ring = 0;
2384 ibs_request.number_of_ibs = 1;
2385 ibs_request.ibs = &ib_info;
2386 ibs_request.resources = bo_list;
2387 ibs_request.fence_info.handle = NULL;
2388
2389 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2390 CU_ASSERT_EQUAL(r, 0);
2391 seq_no = ibs_request.seq_no;
2392
2393
2394
2395 /* Prepare second command with dependency on the first */
2396 j = i;
2397 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2398 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2399 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2400 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2401 ptr[i++] = 99;
2402
2403 while (i & 7)
2404 ptr[i++] = 0xffff1000; /* type3 nop packet */
2405
2406 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2407 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2408 ib_info.size = i - j;
2409
2410 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2411 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2412 ibs_request.ring = 0;
2413 ibs_request.number_of_ibs = 1;
2414 ibs_request.ibs = &ib_info;
2415 ibs_request.resources = bo_list;
2416 ibs_request.fence_info.handle = NULL;
2417
2418 ibs_request.number_of_dependencies = 1;
2419
2420 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2421 ibs_request.dependencies[0].context = context_handle[1];
2422 ibs_request.dependencies[0].ip_instance = 0;
2423 ibs_request.dependencies[0].ring = 0;
2424 ibs_request.dependencies[0].fence = seq_no;
2425
2426
2427 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2428 CU_ASSERT_EQUAL(r, 0);
2429
2430
2431 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2432 fence_status.context = context_handle[0];
2433 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2434 fence_status.ip_instance = 0;
2435 fence_status.ring = 0;
2436 fence_status.fence = ibs_request.seq_no;
2437
2438 r = amdgpu_cs_query_fence_status(&fence_status,
2439 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2440 CU_ASSERT_EQUAL(r, 0);
2441
2442 /* Expect the second command to wait for shader to complete */
2443 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2444
2445 r = amdgpu_bo_list_destroy(bo_list);
2446 CU_ASSERT_EQUAL(r, 0);
2447
2448 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2449 ib_result_mc_address, 4096);
2450 CU_ASSERT_EQUAL(r, 0);
2451
2452 r = amdgpu_cs_ctx_free(context_handle[0]);
2453 CU_ASSERT_EQUAL(r, 0);
2454 r = amdgpu_cs_ctx_free(context_handle[1]);
2455 CU_ASSERT_EQUAL(r, 0);
2456
2457 free(ibs_request.dependencies);
2458 }
2459
amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t * ptr,int family)2460 static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2461 {
2462 struct amdgpu_test_shader *shader;
2463 int i, loop = 0x10000;
2464
2465 switch (family) {
2466 case AMDGPU_FAMILY_AI:
2467 shader = &memcpy_cs_hang_slow_ai;
2468 break;
2469 case AMDGPU_FAMILY_RV:
2470 shader = &memcpy_cs_hang_slow_rv;
2471 break;
2472 case AMDGPU_FAMILY_NV:
2473 shader = &memcpy_cs_hang_slow_nv;
2474 break;
2475 default:
2476 return -1;
2477 break;
2478 }
2479
2480 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2481
2482 for (i = 0; i < loop; i++)
2483 memcpy(ptr + shader->header_length + shader->body_length * i,
2484 shader->shader + shader->header_length,
2485 shader->body_length * sizeof(uint32_t));
2486
2487 memcpy(ptr + shader->header_length + shader->body_length * loop,
2488 shader->shader + shader->header_length + shader->body_length,
2489 shader->foot_length * sizeof(uint32_t));
2490
2491 return 0;
2492 }
2493
amdgpu_dispatch_load_cs_shader(uint8_t * ptr,int cs_type,uint32_t version)2494 static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2495 int cs_type,
2496 uint32_t version)
2497 {
2498 uint32_t shader_size;
2499 const uint32_t *shader;
2500
2501 switch (cs_type) {
2502 case CS_BUFFERCLEAR:
2503 if (version == 9) {
2504 shader = bufferclear_cs_shader_gfx9;
2505 shader_size = sizeof(bufferclear_cs_shader_gfx9);
2506 } else if (version == 10) {
2507 shader = bufferclear_cs_shader_gfx10;
2508 shader_size = sizeof(bufferclear_cs_shader_gfx10);
2509 }
2510 break;
2511 case CS_BUFFERCOPY:
2512 if (version == 9) {
2513 shader = buffercopy_cs_shader_gfx9;
2514 shader_size = sizeof(buffercopy_cs_shader_gfx9);
2515 } else if (version == 10) {
2516 shader = buffercopy_cs_shader_gfx10;
2517 shader_size = sizeof(buffercopy_cs_shader_gfx10);
2518 }
2519 break;
2520 case CS_HANG:
2521 shader = memcpy_ps_hang;
2522 shader_size = sizeof(memcpy_ps_hang);
2523 break;
2524 default:
2525 return -1;
2526 break;
2527 }
2528
2529 memcpy(ptr, shader, shader_size);
2530 return 0;
2531 }
2532
amdgpu_dispatch_init(uint32_t * ptr,uint32_t ip_type,uint32_t version)2533 static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version)
2534 {
2535 int i = 0;
2536
2537 /* Write context control and load shadowing register if necessary */
2538 if (ip_type == AMDGPU_HW_IP_GFX) {
2539 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2540 ptr[i++] = 0x80000000;
2541 ptr[i++] = 0x80000000;
2542 }
2543
2544 /* Issue commands to set default compute state. */
2545 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2546 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2547 ptr[i++] = 0x204;
2548 i += 3;
2549
2550 /* clear mmCOMPUTE_TMPRING_SIZE */
2551 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2552 ptr[i++] = 0x218;
2553 ptr[i++] = 0;
2554
2555 /* Set new sh registers in GFX10 to 0 */
2556 if (version == 10) {
2557 /* mmCOMPUTE_SHADER_CHKSUM */
2558 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2559 ptr[i++] = 0x22a;
2560 ptr[i++] = 0;
2561 /* mmCOMPUTE_REQ_CTRL */
2562 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6);
2563 ptr[i++] = 0x222;
2564 i += 6;
2565 /* mmCP_COHER_START_DELAY */
2566 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2567 ptr[i++] = 0x7b;
2568 ptr[i++] = 0x20;
2569 }
2570 return i;
2571 }
2572
amdgpu_dispatch_write_cumask(uint32_t * ptr,uint32_t version)2573 static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version)
2574 {
2575 int i = 0;
2576
2577 /* Issue commands to set cu mask used in current dispatch */
2578 if (version == 9) {
2579 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2580 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2581 ptr[i++] = 0x216;
2582 ptr[i++] = 0xffffffff;
2583 ptr[i++] = 0xffffffff;
2584 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2585 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2586 ptr[i++] = 0x219;
2587 ptr[i++] = 0xffffffff;
2588 ptr[i++] = 0xffffffff;
2589 } else if (version == 10) {
2590 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2591 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2592 ptr[i++] = 0x30000216;
2593 ptr[i++] = 0xffffffff;
2594 ptr[i++] = 0xffffffff;
2595 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2596 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2597 ptr[i++] = 0x30000219;
2598 ptr[i++] = 0xffffffff;
2599 ptr[i++] = 0xffffffff;
2600 }
2601
2602 return i;
2603 }
2604
amdgpu_dispatch_write2hw(uint32_t * ptr,uint64_t shader_addr,uint32_t version)2605 static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version)
2606 {
2607 int i, j;
2608
2609 i = 0;
2610
2611 /* Writes shader state to HW */
2612 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2613 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2614 ptr[i++] = 0x20c;
2615 ptr[i++] = (shader_addr >> 8);
2616 ptr[i++] = (shader_addr >> 40);
2617 /* write sh regs*/
2618 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2619 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2620 /* - Gfx9ShRegBase */
2621 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2622 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2623 }
2624
2625 if (version == 10) {
2626 /* mmCOMPUTE_PGM_RSRC3 */
2627 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2628 ptr[i++] = 0x228;
2629 ptr[i++] = 0;
2630 }
2631
2632 return i;
2633 }
2634
amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,uint32_t version)2635 static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2636 uint32_t ip_type,
2637 uint32_t ring,
2638 uint32_t version)
2639 {
2640 amdgpu_context_handle context_handle;
2641 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2642 volatile unsigned char *ptr_dst;
2643 void *ptr_shader;
2644 uint32_t *ptr_cmd;
2645 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2646 amdgpu_va_handle va_dst, va_shader, va_cmd;
2647 int i, r;
2648 int bo_dst_size = 16384;
2649 int bo_shader_size = 4096;
2650 int bo_cmd_size = 4096;
2651 struct amdgpu_cs_request ibs_request = {0};
2652 struct amdgpu_cs_ib_info ib_info= {0};
2653 amdgpu_bo_list_handle bo_list;
2654 struct amdgpu_cs_fence fence_status = {0};
2655 uint32_t expired;
2656
2657 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2658 CU_ASSERT_EQUAL(r, 0);
2659
2660 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2661 AMDGPU_GEM_DOMAIN_GTT, 0,
2662 &bo_cmd, (void **)&ptr_cmd,
2663 &mc_address_cmd, &va_cmd);
2664 CU_ASSERT_EQUAL(r, 0);
2665 memset(ptr_cmd, 0, bo_cmd_size);
2666
2667 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2668 AMDGPU_GEM_DOMAIN_VRAM, 0,
2669 &bo_shader, &ptr_shader,
2670 &mc_address_shader, &va_shader);
2671 CU_ASSERT_EQUAL(r, 0);
2672 memset(ptr_shader, 0, bo_shader_size);
2673
2674 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version);
2675 CU_ASSERT_EQUAL(r, 0);
2676
2677 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2678 AMDGPU_GEM_DOMAIN_VRAM, 0,
2679 &bo_dst, (void **)&ptr_dst,
2680 &mc_address_dst, &va_dst);
2681 CU_ASSERT_EQUAL(r, 0);
2682
2683 i = 0;
2684 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2685
2686 /* Issue commands to set cu mask used in current dispatch */
2687 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2688
2689 /* Writes shader state to HW */
2690 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2691
2692 /* Write constant data */
2693 /* Writes the UAV constant data to the SGPRs. */
2694 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2695 ptr_cmd[i++] = 0x240;
2696 ptr_cmd[i++] = mc_address_dst;
2697 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2698 ptr_cmd[i++] = 0x400;
2699 if (version == 9)
2700 ptr_cmd[i++] = 0x74fac;
2701 else if (version == 10)
2702 ptr_cmd[i++] = 0x1104bfac;
2703
2704 /* Sets a range of pixel shader constants */
2705 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2706 ptr_cmd[i++] = 0x244;
2707 ptr_cmd[i++] = 0x22222222;
2708 ptr_cmd[i++] = 0x22222222;
2709 ptr_cmd[i++] = 0x22222222;
2710 ptr_cmd[i++] = 0x22222222;
2711
2712 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2713 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2714 ptr_cmd[i++] = 0x215;
2715 ptr_cmd[i++] = 0;
2716
2717 /* dispatch direct command */
2718 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2719 ptr_cmd[i++] = 0x10;
2720 ptr_cmd[i++] = 1;
2721 ptr_cmd[i++] = 1;
2722 ptr_cmd[i++] = 1;
2723
2724 while (i & 7)
2725 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2726
2727 resources[0] = bo_dst;
2728 resources[1] = bo_shader;
2729 resources[2] = bo_cmd;
2730 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2731 CU_ASSERT_EQUAL(r, 0);
2732
2733 ib_info.ib_mc_address = mc_address_cmd;
2734 ib_info.size = i;
2735 ibs_request.ip_type = ip_type;
2736 ibs_request.ring = ring;
2737 ibs_request.resources = bo_list;
2738 ibs_request.number_of_ibs = 1;
2739 ibs_request.ibs = &ib_info;
2740 ibs_request.fence_info.handle = NULL;
2741
2742 /* submit CS */
2743 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2744 CU_ASSERT_EQUAL(r, 0);
2745
2746 r = amdgpu_bo_list_destroy(bo_list);
2747 CU_ASSERT_EQUAL(r, 0);
2748
2749 fence_status.ip_type = ip_type;
2750 fence_status.ip_instance = 0;
2751 fence_status.ring = ring;
2752 fence_status.context = context_handle;
2753 fence_status.fence = ibs_request.seq_no;
2754
2755 /* wait for IB accomplished */
2756 r = amdgpu_cs_query_fence_status(&fence_status,
2757 AMDGPU_TIMEOUT_INFINITE,
2758 0, &expired);
2759 CU_ASSERT_EQUAL(r, 0);
2760 CU_ASSERT_EQUAL(expired, true);
2761
2762 /* verify if memset test result meets with expected */
2763 i = 0;
2764 while(i < bo_dst_size) {
2765 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2766 }
2767
2768 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2769 CU_ASSERT_EQUAL(r, 0);
2770
2771 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2772 CU_ASSERT_EQUAL(r, 0);
2773
2774 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2775 CU_ASSERT_EQUAL(r, 0);
2776
2777 r = amdgpu_cs_ctx_free(context_handle);
2778 CU_ASSERT_EQUAL(r, 0);
2779 }
2780
amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,uint32_t version,int hang)2781 static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2782 uint32_t ip_type,
2783 uint32_t ring,
2784 uint32_t version,
2785 int hang)
2786 {
2787 amdgpu_context_handle context_handle;
2788 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2789 volatile unsigned char *ptr_dst;
2790 void *ptr_shader;
2791 unsigned char *ptr_src;
2792 uint32_t *ptr_cmd;
2793 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2794 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2795 int i, r;
2796 int bo_dst_size = 16384;
2797 int bo_shader_size = 4096;
2798 int bo_cmd_size = 4096;
2799 struct amdgpu_cs_request ibs_request = {0};
2800 struct amdgpu_cs_ib_info ib_info= {0};
2801 uint32_t expired, hang_state, hangs;
2802 enum cs_type cs_type;
2803 amdgpu_bo_list_handle bo_list;
2804 struct amdgpu_cs_fence fence_status = {0};
2805
2806 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2807 CU_ASSERT_EQUAL(r, 0);
2808
2809 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2810 AMDGPU_GEM_DOMAIN_GTT, 0,
2811 &bo_cmd, (void **)&ptr_cmd,
2812 &mc_address_cmd, &va_cmd);
2813 CU_ASSERT_EQUAL(r, 0);
2814 memset(ptr_cmd, 0, bo_cmd_size);
2815
2816 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2817 AMDGPU_GEM_DOMAIN_VRAM, 0,
2818 &bo_shader, &ptr_shader,
2819 &mc_address_shader, &va_shader);
2820 CU_ASSERT_EQUAL(r, 0);
2821 memset(ptr_shader, 0, bo_shader_size);
2822
2823 cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2824 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
2825 CU_ASSERT_EQUAL(r, 0);
2826
2827 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2828 AMDGPU_GEM_DOMAIN_VRAM, 0,
2829 &bo_src, (void **)&ptr_src,
2830 &mc_address_src, &va_src);
2831 CU_ASSERT_EQUAL(r, 0);
2832
2833 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2834 AMDGPU_GEM_DOMAIN_VRAM, 0,
2835 &bo_dst, (void **)&ptr_dst,
2836 &mc_address_dst, &va_dst);
2837 CU_ASSERT_EQUAL(r, 0);
2838
2839 memset(ptr_src, 0x55, bo_dst_size);
2840
2841 i = 0;
2842 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2843
2844 /* Issue commands to set cu mask used in current dispatch */
2845 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2846
2847 /* Writes shader state to HW */
2848 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2849
2850 /* Write constant data */
2851 /* Writes the texture resource constants data to the SGPRs */
2852 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2853 ptr_cmd[i++] = 0x240;
2854 ptr_cmd[i++] = mc_address_src;
2855 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2856 ptr_cmd[i++] = 0x400;
2857 if (version == 9)
2858 ptr_cmd[i++] = 0x74fac;
2859 else if (version == 10)
2860 ptr_cmd[i++] = 0x1104bfac;
2861
2862 /* Writes the UAV constant data to the SGPRs. */
2863 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864 ptr_cmd[i++] = 0x244;
2865 ptr_cmd[i++] = mc_address_dst;
2866 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2867 ptr_cmd[i++] = 0x400;
2868 if (version == 9)
2869 ptr_cmd[i++] = 0x74fac;
2870 else if (version == 10)
2871 ptr_cmd[i++] = 0x1104bfac;
2872
2873 /* clear mmCOMPUTE_RESOURCE_LIMITS */
2874 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2875 ptr_cmd[i++] = 0x215;
2876 ptr_cmd[i++] = 0;
2877
2878 /* dispatch direct command */
2879 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2880 ptr_cmd[i++] = 0x10;
2881 ptr_cmd[i++] = 1;
2882 ptr_cmd[i++] = 1;
2883 ptr_cmd[i++] = 1;
2884
2885 while (i & 7)
2886 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2887
2888 resources[0] = bo_shader;
2889 resources[1] = bo_src;
2890 resources[2] = bo_dst;
2891 resources[3] = bo_cmd;
2892 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2893 CU_ASSERT_EQUAL(r, 0);
2894
2895 ib_info.ib_mc_address = mc_address_cmd;
2896 ib_info.size = i;
2897 ibs_request.ip_type = ip_type;
2898 ibs_request.ring = ring;
2899 ibs_request.resources = bo_list;
2900 ibs_request.number_of_ibs = 1;
2901 ibs_request.ibs = &ib_info;
2902 ibs_request.fence_info.handle = NULL;
2903 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2904 CU_ASSERT_EQUAL(r, 0);
2905
2906 fence_status.ip_type = ip_type;
2907 fence_status.ip_instance = 0;
2908 fence_status.ring = ring;
2909 fence_status.context = context_handle;
2910 fence_status.fence = ibs_request.seq_no;
2911
2912 /* wait for IB accomplished */
2913 r = amdgpu_cs_query_fence_status(&fence_status,
2914 AMDGPU_TIMEOUT_INFINITE,
2915 0, &expired);
2916
2917 if (!hang) {
2918 CU_ASSERT_EQUAL(r, 0);
2919 CU_ASSERT_EQUAL(expired, true);
2920
2921 /* verify if memcpy test result meets with expected */
2922 i = 0;
2923 while(i < bo_dst_size) {
2924 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2925 i++;
2926 }
2927 } else {
2928 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2929 CU_ASSERT_EQUAL(r, 0);
2930 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2931 }
2932
2933 r = amdgpu_bo_list_destroy(bo_list);
2934 CU_ASSERT_EQUAL(r, 0);
2935
2936 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2937 CU_ASSERT_EQUAL(r, 0);
2938 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2939 CU_ASSERT_EQUAL(r, 0);
2940
2941 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2942 CU_ASSERT_EQUAL(r, 0);
2943
2944 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2945 CU_ASSERT_EQUAL(r, 0);
2946
2947 r = amdgpu_cs_ctx_free(context_handle);
2948 CU_ASSERT_EQUAL(r, 0);
2949 }
2950
amdgpu_compute_dispatch_test(void)2951 static void amdgpu_compute_dispatch_test(void)
2952 {
2953 int r;
2954 struct drm_amdgpu_info_hw_ip info;
2955 uint32_t ring_id, version;
2956
2957 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2958 CU_ASSERT_EQUAL(r, 0);
2959 if (!info.available_rings)
2960 printf("SKIP ... as there's no compute ring\n");
2961
2962 version = info.hw_ip_version_major;
2963 if (version != 9 && version != 10) {
2964 printf("SKIP ... unsupported gfx version %d\n", version);
2965 return;
2966 }
2967
2968 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2969 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version);
2970 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0);
2971 }
2972 }
2973
amdgpu_gfx_dispatch_test(void)2974 static void amdgpu_gfx_dispatch_test(void)
2975 {
2976 int r;
2977 struct drm_amdgpu_info_hw_ip info;
2978 uint32_t ring_id, version;
2979
2980 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2981 CU_ASSERT_EQUAL(r, 0);
2982 if (!info.available_rings)
2983 printf("SKIP ... as there's no graphics ring\n");
2984
2985 version = info.hw_ip_version_major;
2986 if (version != 9 && version != 10) {
2987 printf("SKIP ... unsupported gfx version %d\n", version);
2988 return;
2989 }
2990
2991 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2992 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version);
2993 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0);
2994 }
2995 }
2996
amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle,uint32_t ip_type)2997 void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2998 {
2999 int r;
3000 struct drm_amdgpu_info_hw_ip info;
3001 uint32_t ring_id, version;
3002
3003 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3004 CU_ASSERT_EQUAL(r, 0);
3005 if (!info.available_rings)
3006 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3007
3008 version = info.hw_ip_version_major;
3009 if (version != 9 && version != 10) {
3010 printf("SKIP ... unsupported gfx version %d\n", version);
3011 return;
3012 }
3013
3014 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3015 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3016 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1);
3017 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3018 }
3019 }
3020
amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ip_type,uint32_t ring,int version)3021 static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
3022 uint32_t ip_type, uint32_t ring, int version)
3023 {
3024 amdgpu_context_handle context_handle;
3025 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
3026 volatile unsigned char *ptr_dst;
3027 void *ptr_shader;
3028 unsigned char *ptr_src;
3029 uint32_t *ptr_cmd;
3030 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
3031 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
3032 int i, r;
3033 int bo_dst_size = 0x4000000;
3034 int bo_shader_size = 0x400000;
3035 int bo_cmd_size = 4096;
3036 struct amdgpu_cs_request ibs_request = {0};
3037 struct amdgpu_cs_ib_info ib_info= {0};
3038 uint32_t hang_state, hangs, expired;
3039 struct amdgpu_gpu_info gpu_info = {0};
3040 amdgpu_bo_list_handle bo_list;
3041 struct amdgpu_cs_fence fence_status = {0};
3042
3043 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3044 CU_ASSERT_EQUAL(r, 0);
3045
3046 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3047 CU_ASSERT_EQUAL(r, 0);
3048
3049 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3050 AMDGPU_GEM_DOMAIN_GTT, 0,
3051 &bo_cmd, (void **)&ptr_cmd,
3052 &mc_address_cmd, &va_cmd);
3053 CU_ASSERT_EQUAL(r, 0);
3054 memset(ptr_cmd, 0, bo_cmd_size);
3055
3056 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3057 AMDGPU_GEM_DOMAIN_VRAM, 0,
3058 &bo_shader, &ptr_shader,
3059 &mc_address_shader, &va_shader);
3060 CU_ASSERT_EQUAL(r, 0);
3061 memset(ptr_shader, 0, bo_shader_size);
3062
3063 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
3064 CU_ASSERT_EQUAL(r, 0);
3065
3066 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3067 AMDGPU_GEM_DOMAIN_VRAM, 0,
3068 &bo_src, (void **)&ptr_src,
3069 &mc_address_src, &va_src);
3070 CU_ASSERT_EQUAL(r, 0);
3071
3072 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3073 AMDGPU_GEM_DOMAIN_VRAM, 0,
3074 &bo_dst, (void **)&ptr_dst,
3075 &mc_address_dst, &va_dst);
3076 CU_ASSERT_EQUAL(r, 0);
3077
3078 memset(ptr_src, 0x55, bo_dst_size);
3079
3080 i = 0;
3081 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
3082
3083 /* Issue commands to set cu mask used in current dispatch */
3084 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
3085
3086 /* Writes shader state to HW */
3087 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
3088
3089 /* Write constant data */
3090 /* Writes the texture resource constants data to the SGPRs */
3091 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3092 ptr_cmd[i++] = 0x240;
3093 ptr_cmd[i++] = mc_address_src;
3094 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
3095 ptr_cmd[i++] = 0x400000;
3096 if (version == 9)
3097 ptr_cmd[i++] = 0x74fac;
3098 else if (version == 10)
3099 ptr_cmd[i++] = 0x1104bfac;
3100
3101 /* Writes the UAV constant data to the SGPRs. */
3102 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3103 ptr_cmd[i++] = 0x244;
3104 ptr_cmd[i++] = mc_address_dst;
3105 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
3106 ptr_cmd[i++] = 0x400000;
3107 if (version == 9)
3108 ptr_cmd[i++] = 0x74fac;
3109 else if (version == 10)
3110 ptr_cmd[i++] = 0x1104bfac;
3111
3112 /* clear mmCOMPUTE_RESOURCE_LIMITS */
3113 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
3114 ptr_cmd[i++] = 0x215;
3115 ptr_cmd[i++] = 0;
3116
3117 /* dispatch direct command */
3118 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
3119 ptr_cmd[i++] = 0x10000;
3120 ptr_cmd[i++] = 1;
3121 ptr_cmd[i++] = 1;
3122 ptr_cmd[i++] = 1;
3123
3124 while (i & 7)
3125 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3126
3127 resources[0] = bo_shader;
3128 resources[1] = bo_src;
3129 resources[2] = bo_dst;
3130 resources[3] = bo_cmd;
3131 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3132 CU_ASSERT_EQUAL(r, 0);
3133
3134 ib_info.ib_mc_address = mc_address_cmd;
3135 ib_info.size = i;
3136 ibs_request.ip_type = ip_type;
3137 ibs_request.ring = ring;
3138 ibs_request.resources = bo_list;
3139 ibs_request.number_of_ibs = 1;
3140 ibs_request.ibs = &ib_info;
3141 ibs_request.fence_info.handle = NULL;
3142 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3143 CU_ASSERT_EQUAL(r, 0);
3144
3145 fence_status.ip_type = ip_type;
3146 fence_status.ip_instance = 0;
3147 fence_status.ring = ring;
3148 fence_status.context = context_handle;
3149 fence_status.fence = ibs_request.seq_no;
3150
3151 /* wait for IB accomplished */
3152 r = amdgpu_cs_query_fence_status(&fence_status,
3153 AMDGPU_TIMEOUT_INFINITE,
3154 0, &expired);
3155
3156 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3157 CU_ASSERT_EQUAL(r, 0);
3158 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3159
3160 r = amdgpu_bo_list_destroy(bo_list);
3161 CU_ASSERT_EQUAL(r, 0);
3162
3163 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
3164 CU_ASSERT_EQUAL(r, 0);
3165 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3166 CU_ASSERT_EQUAL(r, 0);
3167
3168 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3169 CU_ASSERT_EQUAL(r, 0);
3170
3171 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
3172 CU_ASSERT_EQUAL(r, 0);
3173
3174 r = amdgpu_cs_ctx_free(context_handle);
3175 CU_ASSERT_EQUAL(r, 0);
3176 }
3177
amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle,uint32_t ip_type)3178 void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
3179 {
3180 int r;
3181 struct drm_amdgpu_info_hw_ip info;
3182 uint32_t ring_id, version;
3183
3184 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3185 CU_ASSERT_EQUAL(r, 0);
3186 if (!info.available_rings)
3187 printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3188
3189 version = info.hw_ip_version_major;
3190 if (version != 9 && version != 10) {
3191 printf("SKIP ... unsupported gfx version %d\n", version);
3192 return;
3193 }
3194
3195 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3196 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3197 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version);
3198 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3199 }
3200 }
3201
amdgpu_draw_load_ps_shader_hang_slow(uint32_t * ptr,int family)3202 static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
3203 {
3204 struct amdgpu_test_shader *shader;
3205 int i, loop = 0x40000;
3206
3207 switch (family) {
3208 case AMDGPU_FAMILY_AI:
3209 case AMDGPU_FAMILY_RV:
3210 shader = &memcpy_ps_hang_slow_ai;
3211 break;
3212 default:
3213 return -1;
3214 break;
3215 }
3216
3217 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
3218
3219 for (i = 0; i < loop; i++)
3220 memcpy(ptr + shader->header_length + shader->body_length * i,
3221 shader->shader + shader->header_length,
3222 shader->body_length * sizeof(uint32_t));
3223
3224 memcpy(ptr + shader->header_length + shader->body_length * loop,
3225 shader->shader + shader->header_length + shader->body_length,
3226 shader->foot_length * sizeof(uint32_t));
3227
3228 return 0;
3229 }
3230
amdgpu_draw_load_ps_shader(uint8_t * ptr,int ps_type,uint32_t version)3231 static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version)
3232 {
3233 int i;
3234 uint32_t shader_offset= 256;
3235 uint32_t mem_offset, patch_code_offset;
3236 uint32_t shader_size, patchinfo_code_size;
3237 const uint32_t *shader;
3238 const uint32_t *patchinfo_code;
3239 const uint32_t *patchcode_offset;
3240
3241 switch (ps_type) {
3242 case PS_CONST:
3243 if (version == 9) {
3244 shader = ps_const_shader_gfx9;
3245 shader_size = sizeof(ps_const_shader_gfx9);
3246 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3247 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3248 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3249 } else if (version == 10){
3250 shader = ps_const_shader_gfx10;
3251 shader_size = sizeof(ps_const_shader_gfx10);
3252 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10;
3253 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10;
3254 patchcode_offset = ps_const_shader_patchinfo_offset_gfx10;
3255 }
3256 break;
3257 case PS_TEX:
3258 if (version == 9) {
3259 shader = ps_tex_shader_gfx9;
3260 shader_size = sizeof(ps_tex_shader_gfx9);
3261 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3262 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3263 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3264 } else if (version == 10) {
3265 shader = ps_tex_shader_gfx10;
3266 shader_size = sizeof(ps_tex_shader_gfx10);
3267 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10;
3268 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10;
3269 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10;
3270 }
3271 break;
3272 case PS_HANG:
3273 shader = memcpy_ps_hang;
3274 shader_size = sizeof(memcpy_ps_hang);
3275
3276 memcpy(ptr, shader, shader_size);
3277 return 0;
3278 default:
3279 return -1;
3280 break;
3281 }
3282
3283 /* write main shader program */
3284 for (i = 0 ; i < 10; i++) {
3285 mem_offset = i * shader_offset;
3286 memcpy(ptr + mem_offset, shader, shader_size);
3287 }
3288
3289 /* overwrite patch codes */
3290 for (i = 0 ; i < 10; i++) {
3291 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3292 patch_code_offset = i * patchinfo_code_size;
3293 memcpy(ptr + mem_offset,
3294 patchinfo_code + patch_code_offset,
3295 patchinfo_code_size * sizeof(uint32_t));
3296 }
3297
3298 return 0;
3299 }
3300
3301 /* load RectPosTexFast_VS */
amdgpu_draw_load_vs_shader(uint8_t * ptr,uint32_t version)3302 static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version)
3303 {
3304 const uint32_t *shader;
3305 uint32_t shader_size;
3306
3307 if (version == 9) {
3308 shader = vs_RectPosTexFast_shader_gfx9;
3309 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3310 } else if (version == 10) {
3311 shader = vs_RectPosTexFast_shader_gfx10;
3312 shader_size = sizeof(vs_RectPosTexFast_shader_gfx10);
3313 }
3314
3315 memcpy(ptr, shader, shader_size);
3316
3317 return 0;
3318 }
3319
amdgpu_draw_init(uint32_t * ptr,uint32_t version)3320 static int amdgpu_draw_init(uint32_t *ptr, uint32_t version)
3321 {
3322 int i = 0;
3323 const uint32_t *preamblecache_ptr;
3324 uint32_t preamblecache_size;
3325
3326 /* Write context control and load shadowing register if necessary */
3327 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3328 ptr[i++] = 0x80000000;
3329 ptr[i++] = 0x80000000;
3330
3331 if (version == 9) {
3332 preamblecache_ptr = preamblecache_gfx9;
3333 preamblecache_size = sizeof(preamblecache_gfx9);
3334 } else if (version == 10) {
3335 preamblecache_ptr = preamblecache_gfx10;
3336 preamblecache_size = sizeof(preamblecache_gfx10);
3337 }
3338
3339 memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3340 return i + preamblecache_size/sizeof(uint32_t);
3341 }
3342
amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t * ptr,uint64_t dst_addr,uint32_t version,int hang_slow)3343 static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3344 uint64_t dst_addr,
3345 uint32_t version,
3346 int hang_slow)
3347 {
3348 int i = 0;
3349
3350 /* setup color buffer */
3351 if (version == 9) {
3352 /* offset reg
3353 0xA318 CB_COLOR0_BASE
3354 0xA319 CB_COLOR0_BASE_EXT
3355 0xA31A CB_COLOR0_ATTRIB2
3356 0xA31B CB_COLOR0_VIEW
3357 0xA31C CB_COLOR0_INFO
3358 0xA31D CB_COLOR0_ATTRIB
3359 0xA31E CB_COLOR0_DCC_CONTROL
3360 0xA31F CB_COLOR0_CMASK
3361 0xA320 CB_COLOR0_CMASK_BASE_EXT
3362 0xA321 CB_COLOR0_FMASK
3363 0xA322 CB_COLOR0_FMASK_BASE_EXT
3364 0xA323 CB_COLOR0_CLEAR_WORD0
3365 0xA324 CB_COLOR0_CLEAR_WORD1
3366 0xA325 CB_COLOR0_DCC_BASE
3367 0xA326 CB_COLOR0_DCC_BASE_EXT */
3368 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3369 ptr[i++] = 0x318;
3370 ptr[i++] = dst_addr >> 8;
3371 ptr[i++] = dst_addr >> 40;
3372 ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3373 ptr[i++] = 0;
3374 ptr[i++] = 0x50438;
3375 ptr[i++] = 0x10140000;
3376 i += 9;
3377
3378 /* mmCB_MRT0_EPITCH */
3379 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3380 ptr[i++] = 0x1e8;
3381 ptr[i++] = hang_slow ? 0xfff : 0x1f;
3382 } else if (version == 10) {
3383 /* 0xA318 CB_COLOR0_BASE
3384 0xA319 CB_COLOR0_PITCH
3385 0xA31A CB_COLOR0_SLICE
3386 0xA31B CB_COLOR0_VIEW
3387 0xA31C CB_COLOR0_INFO
3388 0xA31D CB_COLOR0_ATTRIB
3389 0xA31E CB_COLOR0_DCC_CONTROL
3390 0xA31F CB_COLOR0_CMASK
3391 0xA320 CB_COLOR0_CMASK_SLICE
3392 0xA321 CB_COLOR0_FMASK
3393 0xA322 CB_COLOR0_FMASK_SLICE
3394 0xA323 CB_COLOR0_CLEAR_WORD0
3395 0xA324 CB_COLOR0_CLEAR_WORD1
3396 0xA325 CB_COLOR0_DCC_BASE */
3397 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14);
3398 ptr[i++] = 0x318;
3399 ptr[i++] = dst_addr >> 8;
3400 i += 3;
3401 ptr[i++] = 0x50438;
3402 i += 9;
3403
3404 /* 0xA390 CB_COLOR0_BASE_EXT */
3405 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3406 ptr[i++] = 0x390;
3407 ptr[i++] = dst_addr >> 40;
3408
3409 /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */
3410 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3411 ptr[i++] = 0x398;
3412 ptr[i++] = 0;
3413
3414 /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */
3415 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3416 ptr[i++] = 0x3a0;
3417 ptr[i++] = 0;
3418
3419 /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */
3420 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3421 ptr[i++] = 0x3a8;
3422 ptr[i++] = 0;
3423
3424 /* 0xA3B0 CB_COLOR0_ATTRIB2 */
3425 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3426 ptr[i++] = 0x3b0;
3427 ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3428
3429 /* 0xA3B8 CB_COLOR0_ATTRIB3 */
3430 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3431 ptr[i++] = 0x3b8;
3432 ptr[i++] = 0x9014000;
3433 }
3434
3435 /* 0xA32B CB_COLOR1_BASE */
3436 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3437 ptr[i++] = 0x32b;
3438 ptr[i++] = 0;
3439
3440 /* 0xA33A CB_COLOR1_BASE */
3441 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3442 ptr[i++] = 0x33a;
3443 ptr[i++] = 0;
3444
3445 /* SPI_SHADER_COL_FORMAT */
3446 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3447 ptr[i++] = 0x1c5;
3448 ptr[i++] = 9;
3449
3450 /* Setup depth buffer */
3451 if (version == 9) {
3452 /* mmDB_Z_INFO */
3453 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3454 ptr[i++] = 0xe;
3455 i += 2;
3456 } else if (version == 10) {
3457 /* mmDB_Z_INFO */
3458 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3459 ptr[i++] = 0x10;
3460 i += 2;
3461 }
3462
3463 return i;
3464 }
3465
amdgpu_draw_setup_and_write_drawblt_state(uint32_t * ptr,uint32_t version,int hang_slow)3466 static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr,
3467 uint32_t version,
3468 int hang_slow)
3469 {
3470 int i = 0;
3471 const uint32_t *cached_cmd_ptr;
3472 uint32_t cached_cmd_size;
3473
3474 /* mmPA_SC_TILE_STEERING_OVERRIDE */
3475 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3476 ptr[i++] = 0xd7;
3477 ptr[i++] = 0;
3478
3479 ptr[i++] = 0xffff1000;
3480 ptr[i++] = 0xc0021000;
3481
3482 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3483 ptr[i++] = 0xd7;
3484 if (version == 9)
3485 ptr[i++] = 1;
3486 else if (version == 10)
3487 ptr[i++] = 0;
3488
3489 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3490 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3491 ptr[i++] = 0x2fe;
3492 i += 16;
3493
3494 /* mmPA_SC_CENTROID_PRIORITY_0 */
3495 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3496 ptr[i++] = 0x2f5;
3497 i += 2;
3498
3499 if (version == 9) {
3500 cached_cmd_ptr = cached_cmd_gfx9;
3501 cached_cmd_size = sizeof(cached_cmd_gfx9);
3502 } else if (version == 10) {
3503 cached_cmd_ptr = cached_cmd_gfx10;
3504 cached_cmd_size = sizeof(cached_cmd_gfx10);
3505 }
3506
3507 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3508 if (hang_slow)
3509 *(ptr + i + 12) = 0x8000800;
3510 i += cached_cmd_size/sizeof(uint32_t);
3511
3512 if (version == 10) {
3513 /* mmCB_RMI_GL2_CACHE_CONTROL */
3514 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3515 ptr[i++] = 0x104;
3516 ptr[i++] = 0x40aa0055;
3517 /* mmDB_RMI_L2_CACHE_CONTROL */
3518 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3519 ptr[i++] = 0x1f;
3520 ptr[i++] = 0x2a0055;
3521 }
3522
3523 return i;
3524 }
3525
amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,uint32_t version,int hang_slow)3526 static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3527 int ps_type,
3528 uint64_t shader_addr,
3529 uint32_t version,
3530 int hang_slow)
3531 {
3532 int i = 0;
3533
3534 /* mmPA_CL_VS_OUT_CNTL */
3535 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3536 ptr[i++] = 0x207;
3537 ptr[i++] = 0;
3538
3539 if (version == 9) {
3540 /* mmSPI_SHADER_PGM_RSRC3_VS */
3541 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3542 ptr[i++] = 0x46;
3543 ptr[i++] = 0xffff;
3544 } else if (version == 10) {
3545 /* mmSPI_SHADER_PGM_RSRC3_VS */
3546 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3547 ptr[i++] = 0x30000046;
3548 ptr[i++] = 0xffff;
3549 /* mmSPI_SHADER_PGM_RSRC4_VS */
3550 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3551 ptr[i++] = 0x30000041;
3552 ptr[i++] = 0xffff;
3553 }
3554
3555 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3556 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3557 ptr[i++] = 0x48;
3558 ptr[i++] = shader_addr >> 8;
3559 ptr[i++] = shader_addr >> 40;
3560
3561 /* mmSPI_SHADER_PGM_RSRC1_VS */
3562 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3563 ptr[i++] = 0x4a;
3564 if (version == 9)
3565 ptr[i++] = 0xc0081;
3566 else if (version == 10)
3567 ptr[i++] = 0xc0041;
3568 /* mmSPI_SHADER_PGM_RSRC2_VS */
3569 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3570 ptr[i++] = 0x4b;
3571 ptr[i++] = 0x18;
3572
3573 /* mmSPI_VS_OUT_CONFIG */
3574 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3575 ptr[i++] = 0x1b1;
3576 ptr[i++] = 2;
3577
3578 /* mmSPI_SHADER_POS_FORMAT */
3579 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3580 ptr[i++] = 0x1c3;
3581 ptr[i++] = 4;
3582
3583 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3584 ptr[i++] = 0x4c;
3585 i += 2;
3586 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3587 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3588
3589 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3590 ptr[i++] = 0x50;
3591 i += 2;
3592 if (ps_type == PS_CONST) {
3593 i += 2;
3594 } else if (ps_type == PS_TEX) {
3595 ptr[i++] = 0x3f800000;
3596 ptr[i++] = 0x3f800000;
3597 }
3598
3599 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3600 ptr[i++] = 0x54;
3601 i += 4;
3602
3603 return i;
3604 }
3605
amdgpu_draw_ps_write2hw(uint32_t * ptr,int ps_type,uint64_t shader_addr,uint32_t version)3606 static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3607 int ps_type,
3608 uint64_t shader_addr,
3609 uint32_t version)
3610 {
3611 int i, j;
3612 const uint32_t *sh_registers;
3613 const uint32_t *context_registers;
3614 uint32_t num_sh_reg, num_context_reg;
3615
3616 if (ps_type == PS_CONST) {
3617 if (version == 9) {
3618 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3619 num_sh_reg = ps_num_sh_registers_gfx9;
3620 } else if (version == 10) {
3621 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10;
3622 num_sh_reg = ps_num_sh_registers_gfx10;
3623 }
3624 context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3625 num_context_reg = ps_num_context_registers_gfx9;
3626 } else if (ps_type == PS_TEX) {
3627 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3628 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3629 num_sh_reg = ps_num_sh_registers_gfx9;
3630 num_context_reg = ps_num_context_registers_gfx9;
3631 }
3632
3633 i = 0;
3634
3635 if (version == 9) {
3636 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS
3637 0x2c08 SPI_SHADER_PGM_LO_PS
3638 0x2c09 SPI_SHADER_PGM_HI_PS */
3639 /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */
3640 shader_addr += 256 * 9;
3641 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3642 ptr[i++] = 0x7;
3643 ptr[i++] = 0xffff;
3644 ptr[i++] = shader_addr >> 8;
3645 ptr[i++] = shader_addr >> 40;
3646 } else if (version == 10) {
3647 shader_addr += 256 * 9;
3648 /* 0x2c08 SPI_SHADER_PGM_LO_PS
3649 0x2c09 SPI_SHADER_PGM_HI_PS */
3650 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3651 ptr[i++] = 0x8;
3652 ptr[i++] = shader_addr >> 8;
3653 ptr[i++] = shader_addr >> 40;
3654
3655 /* mmSPI_SHADER_PGM_RSRC3_PS */
3656 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3657 ptr[i++] = 0x30000007;
3658 ptr[i++] = 0xffff;
3659 /* mmSPI_SHADER_PGM_RSRC4_PS */
3660 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3661 ptr[i++] = 0x30000001;
3662 ptr[i++] = 0xffff;
3663 }
3664
3665 for (j = 0; j < num_sh_reg; j++) {
3666 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3667 ptr[i++] = sh_registers[j * 2] - 0x2c00;
3668 ptr[i++] = sh_registers[j * 2 + 1];
3669 }
3670
3671 for (j = 0; j < num_context_reg; j++) {
3672 if (context_registers[j * 2] != 0xA1C5) {
3673 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3674 ptr[i++] = context_registers[j * 2] - 0xa000;
3675 ptr[i++] = context_registers[j * 2 + 1];
3676 }
3677
3678 if (context_registers[j * 2] == 0xA1B4) {
3679 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3680 ptr[i++] = 0x1b3;
3681 ptr[i++] = 2;
3682 }
3683 }
3684
3685 return i;
3686 }
3687
amdgpu_draw_draw(uint32_t * ptr,uint32_t version)3688 static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version)
3689 {
3690 int i = 0;
3691
3692 if (version == 9) {
3693 /* mmIA_MULTI_VGT_PARAM */
3694 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3695 ptr[i++] = 0x40000258;
3696 ptr[i++] = 0xd00ff;
3697 /* mmVGT_PRIMITIVE_TYPE */
3698 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3699 ptr[i++] = 0x10000242;
3700 ptr[i++] = 0x11;
3701 } else if (version == 10) {
3702 /* mmGE_CNTL */
3703 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3704 ptr[i++] = 0x25b;
3705 ptr[i++] = 0xff;
3706 /* mmVGT_PRIMITIVE_TYPE */
3707 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3708 ptr[i++] = 0x242;
3709 ptr[i++] = 0x11;
3710 }
3711
3712 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3713 ptr[i++] = 3;
3714 ptr[i++] = 2;
3715
3716 return i;
3717 }
3718
amdgpu_memset_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring_id,uint32_t version)3719 void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3720 amdgpu_bo_handle bo_shader_ps,
3721 amdgpu_bo_handle bo_shader_vs,
3722 uint64_t mc_address_shader_ps,
3723 uint64_t mc_address_shader_vs,
3724 uint32_t ring_id, uint32_t version)
3725 {
3726 amdgpu_context_handle context_handle;
3727 amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3728 volatile unsigned char *ptr_dst;
3729 uint32_t *ptr_cmd;
3730 uint64_t mc_address_dst, mc_address_cmd;
3731 amdgpu_va_handle va_dst, va_cmd;
3732 int i, r;
3733 int bo_dst_size = 16384;
3734 int bo_cmd_size = 4096;
3735 struct amdgpu_cs_request ibs_request = {0};
3736 struct amdgpu_cs_ib_info ib_info = {0};
3737 struct amdgpu_cs_fence fence_status = {0};
3738 uint32_t expired;
3739 amdgpu_bo_list_handle bo_list;
3740
3741 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3742 CU_ASSERT_EQUAL(r, 0);
3743
3744 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3745 AMDGPU_GEM_DOMAIN_GTT, 0,
3746 &bo_cmd, (void **)&ptr_cmd,
3747 &mc_address_cmd, &va_cmd);
3748 CU_ASSERT_EQUAL(r, 0);
3749 memset(ptr_cmd, 0, bo_cmd_size);
3750
3751 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3752 AMDGPU_GEM_DOMAIN_VRAM, 0,
3753 &bo_dst, (void **)&ptr_dst,
3754 &mc_address_dst, &va_dst);
3755 CU_ASSERT_EQUAL(r, 0);
3756
3757 i = 0;
3758 i += amdgpu_draw_init(ptr_cmd + i, version);
3759
3760 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3761
3762 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3763
3764 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs,
3765 version, 0);
3766
3767 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version);
3768
3769 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770 ptr_cmd[i++] = 0xc;
3771 ptr_cmd[i++] = 0x33333333;
3772 ptr_cmd[i++] = 0x33333333;
3773 ptr_cmd[i++] = 0x33333333;
3774 ptr_cmd[i++] = 0x33333333;
3775
3776 i += amdgpu_draw_draw(ptr_cmd + i, version);
3777
3778 while (i & 7)
3779 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3780
3781 resources[0] = bo_dst;
3782 resources[1] = bo_shader_ps;
3783 resources[2] = bo_shader_vs;
3784 resources[3] = bo_cmd;
3785 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3786 CU_ASSERT_EQUAL(r, 0);
3787
3788 ib_info.ib_mc_address = mc_address_cmd;
3789 ib_info.size = i;
3790 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3791 ibs_request.ring = ring_id;
3792 ibs_request.resources = bo_list;
3793 ibs_request.number_of_ibs = 1;
3794 ibs_request.ibs = &ib_info;
3795 ibs_request.fence_info.handle = NULL;
3796
3797 /* submit CS */
3798 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3799 CU_ASSERT_EQUAL(r, 0);
3800
3801 r = amdgpu_bo_list_destroy(bo_list);
3802 CU_ASSERT_EQUAL(r, 0);
3803
3804 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3805 fence_status.ip_instance = 0;
3806 fence_status.ring = ring_id;
3807 fence_status.context = context_handle;
3808 fence_status.fence = ibs_request.seq_no;
3809
3810 /* wait for IB accomplished */
3811 r = amdgpu_cs_query_fence_status(&fence_status,
3812 AMDGPU_TIMEOUT_INFINITE,
3813 0, &expired);
3814 CU_ASSERT_EQUAL(r, 0);
3815 CU_ASSERT_EQUAL(expired, true);
3816
3817 /* verify if memset test result meets with expected */
3818 i = 0;
3819 while(i < bo_dst_size) {
3820 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3821 }
3822
3823 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3824 CU_ASSERT_EQUAL(r, 0);
3825
3826 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3827 CU_ASSERT_EQUAL(r, 0);
3828
3829 r = amdgpu_cs_ctx_free(context_handle);
3830 CU_ASSERT_EQUAL(r, 0);
3831 }
3832
amdgpu_memset_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int version)3833 static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3834 uint32_t ring, int version)
3835 {
3836 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3837 void *ptr_shader_ps;
3838 void *ptr_shader_vs;
3839 uint64_t mc_address_shader_ps, mc_address_shader_vs;
3840 amdgpu_va_handle va_shader_ps, va_shader_vs;
3841 int r;
3842 int bo_shader_size = 4096;
3843
3844 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3845 AMDGPU_GEM_DOMAIN_VRAM, 0,
3846 &bo_shader_ps, &ptr_shader_ps,
3847 &mc_address_shader_ps, &va_shader_ps);
3848 CU_ASSERT_EQUAL(r, 0);
3849 memset(ptr_shader_ps, 0, bo_shader_size);
3850
3851 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3852 AMDGPU_GEM_DOMAIN_VRAM, 0,
3853 &bo_shader_vs, &ptr_shader_vs,
3854 &mc_address_shader_vs, &va_shader_vs);
3855 CU_ASSERT_EQUAL(r, 0);
3856 memset(ptr_shader_vs, 0, bo_shader_size);
3857
3858 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version);
3859 CU_ASSERT_EQUAL(r, 0);
3860
3861 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
3862 CU_ASSERT_EQUAL(r, 0);
3863
3864 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3865 mc_address_shader_ps, mc_address_shader_vs,
3866 ring, version);
3867
3868 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3869 CU_ASSERT_EQUAL(r, 0);
3870
3871 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3872 CU_ASSERT_EQUAL(r, 0);
3873 }
3874
amdgpu_memcpy_draw(amdgpu_device_handle device_handle,amdgpu_bo_handle bo_shader_ps,amdgpu_bo_handle bo_shader_vs,uint64_t mc_address_shader_ps,uint64_t mc_address_shader_vs,uint32_t ring,int version,int hang)3875 static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3876 amdgpu_bo_handle bo_shader_ps,
3877 amdgpu_bo_handle bo_shader_vs,
3878 uint64_t mc_address_shader_ps,
3879 uint64_t mc_address_shader_vs,
3880 uint32_t ring, int version, int hang)
3881 {
3882 amdgpu_context_handle context_handle;
3883 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3884 volatile unsigned char *ptr_dst;
3885 unsigned char *ptr_src;
3886 uint32_t *ptr_cmd;
3887 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3888 amdgpu_va_handle va_dst, va_src, va_cmd;
3889 int i, r;
3890 int bo_size = 16384;
3891 int bo_cmd_size = 4096;
3892 struct amdgpu_cs_request ibs_request = {0};
3893 struct amdgpu_cs_ib_info ib_info= {0};
3894 uint32_t hang_state, hangs;
3895 uint32_t expired;
3896 amdgpu_bo_list_handle bo_list;
3897 struct amdgpu_cs_fence fence_status = {0};
3898
3899 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3900 CU_ASSERT_EQUAL(r, 0);
3901
3902 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3903 AMDGPU_GEM_DOMAIN_GTT, 0,
3904 &bo_cmd, (void **)&ptr_cmd,
3905 &mc_address_cmd, &va_cmd);
3906 CU_ASSERT_EQUAL(r, 0);
3907 memset(ptr_cmd, 0, bo_cmd_size);
3908
3909 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3910 AMDGPU_GEM_DOMAIN_VRAM, 0,
3911 &bo_src, (void **)&ptr_src,
3912 &mc_address_src, &va_src);
3913 CU_ASSERT_EQUAL(r, 0);
3914
3915 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3916 AMDGPU_GEM_DOMAIN_VRAM, 0,
3917 &bo_dst, (void **)&ptr_dst,
3918 &mc_address_dst, &va_dst);
3919 CU_ASSERT_EQUAL(r, 0);
3920
3921 memset(ptr_src, 0x55, bo_size);
3922
3923 i = 0;
3924 i += amdgpu_draw_init(ptr_cmd + i, version);
3925
3926 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3927
3928 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3929
3930 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs,
3931 version, 0);
3932
3933 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
3934
3935 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3936 if (version == 9) {
3937 ptr_cmd[i++] = 0xc;
3938 ptr_cmd[i++] = mc_address_src >> 8;
3939 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3940 ptr_cmd[i++] = 0x7c01f;
3941 ptr_cmd[i++] = 0x90500fac;
3942 ptr_cmd[i++] = 0x3e000;
3943 i += 3;
3944 } else if (version == 10) {
3945 ptr_cmd[i++] = 0xc;
3946 ptr_cmd[i++] = mc_address_src >> 8;
3947 ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
3948 ptr_cmd[i++] = 0x8007c007;
3949 ptr_cmd[i++] = 0x90500fac;
3950 i += 2;
3951 ptr_cmd[i++] = 0x400;
3952 i++;
3953 }
3954
3955 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3956 ptr_cmd[i++] = 0x14;
3957 ptr_cmd[i++] = 0x92;
3958 i += 3;
3959
3960 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3961 ptr_cmd[i++] = 0x191;
3962 ptr_cmd[i++] = 0;
3963
3964 i += amdgpu_draw_draw(ptr_cmd + i, version);
3965
3966 while (i & 7)
3967 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3968
3969 resources[0] = bo_dst;
3970 resources[1] = bo_src;
3971 resources[2] = bo_shader_ps;
3972 resources[3] = bo_shader_vs;
3973 resources[4] = bo_cmd;
3974 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3975 CU_ASSERT_EQUAL(r, 0);
3976
3977 ib_info.ib_mc_address = mc_address_cmd;
3978 ib_info.size = i;
3979 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3980 ibs_request.ring = ring;
3981 ibs_request.resources = bo_list;
3982 ibs_request.number_of_ibs = 1;
3983 ibs_request.ibs = &ib_info;
3984 ibs_request.fence_info.handle = NULL;
3985 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3986 CU_ASSERT_EQUAL(r, 0);
3987
3988 fence_status.ip_type = AMDGPU_HW_IP_GFX;
3989 fence_status.ip_instance = 0;
3990 fence_status.ring = ring;
3991 fence_status.context = context_handle;
3992 fence_status.fence = ibs_request.seq_no;
3993
3994 /* wait for IB accomplished */
3995 r = amdgpu_cs_query_fence_status(&fence_status,
3996 AMDGPU_TIMEOUT_INFINITE,
3997 0, &expired);
3998 if (!hang) {
3999 CU_ASSERT_EQUAL(r, 0);
4000 CU_ASSERT_EQUAL(expired, true);
4001
4002 /* verify if memcpy test result meets with expected */
4003 i = 0;
4004 while(i < bo_size) {
4005 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
4006 i++;
4007 }
4008 } else {
4009 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4010 CU_ASSERT_EQUAL(r, 0);
4011 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4012 }
4013
4014 r = amdgpu_bo_list_destroy(bo_list);
4015 CU_ASSERT_EQUAL(r, 0);
4016
4017 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4018 CU_ASSERT_EQUAL(r, 0);
4019 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4020 CU_ASSERT_EQUAL(r, 0);
4021
4022 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4023 CU_ASSERT_EQUAL(r, 0);
4024
4025 r = amdgpu_cs_ctx_free(context_handle);
4026 CU_ASSERT_EQUAL(r, 0);
4027 }
4028
amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle,uint32_t ring,int version,int hang)4029 void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
4030 int version, int hang)
4031 {
4032 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4033 void *ptr_shader_ps;
4034 void *ptr_shader_vs;
4035 uint64_t mc_address_shader_ps, mc_address_shader_vs;
4036 amdgpu_va_handle va_shader_ps, va_shader_vs;
4037 int bo_shader_size = 4096;
4038 enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
4039 int r;
4040
4041 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4042 AMDGPU_GEM_DOMAIN_VRAM, 0,
4043 &bo_shader_ps, &ptr_shader_ps,
4044 &mc_address_shader_ps, &va_shader_ps);
4045 CU_ASSERT_EQUAL(r, 0);
4046 memset(ptr_shader_ps, 0, bo_shader_size);
4047
4048 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4049 AMDGPU_GEM_DOMAIN_VRAM, 0,
4050 &bo_shader_vs, &ptr_shader_vs,
4051 &mc_address_shader_vs, &va_shader_vs);
4052 CU_ASSERT_EQUAL(r, 0);
4053 memset(ptr_shader_vs, 0, bo_shader_size);
4054
4055 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version);
4056 CU_ASSERT_EQUAL(r, 0);
4057
4058 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4059 CU_ASSERT_EQUAL(r, 0);
4060
4061 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
4062 mc_address_shader_ps, mc_address_shader_vs,
4063 ring, version, hang);
4064
4065 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
4066 CU_ASSERT_EQUAL(r, 0);
4067
4068 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
4069 CU_ASSERT_EQUAL(r, 0);
4070 }
4071
amdgpu_draw_test(void)4072 static void amdgpu_draw_test(void)
4073 {
4074 int r;
4075 struct drm_amdgpu_info_hw_ip info;
4076 uint32_t ring_id, version;
4077
4078 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
4079 CU_ASSERT_EQUAL(r, 0);
4080 if (!info.available_rings)
4081 printf("SKIP ... as there's no graphics ring\n");
4082
4083 version = info.hw_ip_version_major;
4084 if (version != 9 && version != 10) {
4085 printf("SKIP ... unsupported gfx version %d\n", version);
4086 return;
4087 }
4088
4089 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
4090 amdgpu_memset_draw_test(device_handle, ring_id, version);
4091 amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0);
4092 }
4093 }
4094
amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle,uint32_t ring,int version)4095 void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version)
4096 {
4097 amdgpu_context_handle context_handle;
4098 amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4099 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
4100 void *ptr_shader_ps;
4101 void *ptr_shader_vs;
4102 volatile unsigned char *ptr_dst;
4103 unsigned char *ptr_src;
4104 uint32_t *ptr_cmd;
4105 uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
4106 uint64_t mc_address_shader_ps, mc_address_shader_vs;
4107 amdgpu_va_handle va_shader_ps, va_shader_vs;
4108 amdgpu_va_handle va_dst, va_src, va_cmd;
4109 struct amdgpu_gpu_info gpu_info = {0};
4110 int i, r;
4111 int bo_size = 0x4000000;
4112 int bo_shader_ps_size = 0x400000;
4113 int bo_shader_vs_size = 4096;
4114 int bo_cmd_size = 4096;
4115 struct amdgpu_cs_request ibs_request = {0};
4116 struct amdgpu_cs_ib_info ib_info= {0};
4117 uint32_t hang_state, hangs, expired;
4118 amdgpu_bo_list_handle bo_list;
4119 struct amdgpu_cs_fence fence_status = {0};
4120
4121 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
4122 CU_ASSERT_EQUAL(r, 0);
4123
4124 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4125 CU_ASSERT_EQUAL(r, 0);
4126
4127 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
4128 AMDGPU_GEM_DOMAIN_GTT, 0,
4129 &bo_cmd, (void **)&ptr_cmd,
4130 &mc_address_cmd, &va_cmd);
4131 CU_ASSERT_EQUAL(r, 0);
4132 memset(ptr_cmd, 0, bo_cmd_size);
4133
4134 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
4135 AMDGPU_GEM_DOMAIN_VRAM, 0,
4136 &bo_shader_ps, &ptr_shader_ps,
4137 &mc_address_shader_ps, &va_shader_ps);
4138 CU_ASSERT_EQUAL(r, 0);
4139 memset(ptr_shader_ps, 0, bo_shader_ps_size);
4140
4141 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
4142 AMDGPU_GEM_DOMAIN_VRAM, 0,
4143 &bo_shader_vs, &ptr_shader_vs,
4144 &mc_address_shader_vs, &va_shader_vs);
4145 CU_ASSERT_EQUAL(r, 0);
4146 memset(ptr_shader_vs, 0, bo_shader_vs_size);
4147
4148 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
4149 CU_ASSERT_EQUAL(r, 0);
4150
4151 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4152 CU_ASSERT_EQUAL(r, 0);
4153
4154 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4155 AMDGPU_GEM_DOMAIN_VRAM, 0,
4156 &bo_src, (void **)&ptr_src,
4157 &mc_address_src, &va_src);
4158 CU_ASSERT_EQUAL(r, 0);
4159
4160 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4161 AMDGPU_GEM_DOMAIN_VRAM, 0,
4162 &bo_dst, (void **)&ptr_dst,
4163 &mc_address_dst, &va_dst);
4164 CU_ASSERT_EQUAL(r, 0);
4165
4166 memset(ptr_src, 0x55, bo_size);
4167
4168 i = 0;
4169 i += amdgpu_draw_init(ptr_cmd + i, version);
4170
4171 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1);
4172
4173 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1);
4174
4175 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
4176 mc_address_shader_vs, version, 1);
4177
4178 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
4179
4180 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
4181
4182 if (version == 9) {
4183 ptr_cmd[i++] = 0xc;
4184 ptr_cmd[i++] = mc_address_src >> 8;
4185 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
4186 ptr_cmd[i++] = 0x1ffcfff;
4187 ptr_cmd[i++] = 0x90500fac;
4188 ptr_cmd[i++] = 0x1ffe000;
4189 i += 3;
4190 } else if (version == 10) {
4191 ptr_cmd[i++] = 0xc;
4192 ptr_cmd[i++] = mc_address_src >> 8;
4193 ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
4194 ptr_cmd[i++] = 0x81ffc1ff;
4195 ptr_cmd[i++] = 0x90500fac;
4196 i += 4;
4197 }
4198
4199 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
4200 ptr_cmd[i++] = 0x14;
4201 ptr_cmd[i++] = 0x92;
4202 i += 3;
4203
4204 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
4205 ptr_cmd[i++] = 0x191;
4206 ptr_cmd[i++] = 0;
4207
4208 i += amdgpu_draw_draw(ptr_cmd + i, version);
4209
4210 while (i & 7)
4211 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
4212
4213 resources[0] = bo_dst;
4214 resources[1] = bo_src;
4215 resources[2] = bo_shader_ps;
4216 resources[3] = bo_shader_vs;
4217 resources[4] = bo_cmd;
4218 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
4219 CU_ASSERT_EQUAL(r, 0);
4220
4221 ib_info.ib_mc_address = mc_address_cmd;
4222 ib_info.size = i;
4223 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
4224 ibs_request.ring = ring;
4225 ibs_request.resources = bo_list;
4226 ibs_request.number_of_ibs = 1;
4227 ibs_request.ibs = &ib_info;
4228 ibs_request.fence_info.handle = NULL;
4229 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
4230 CU_ASSERT_EQUAL(r, 0);
4231
4232 fence_status.ip_type = AMDGPU_HW_IP_GFX;
4233 fence_status.ip_instance = 0;
4234 fence_status.ring = ring;
4235 fence_status.context = context_handle;
4236 fence_status.fence = ibs_request.seq_no;
4237
4238 /* wait for IB accomplished */
4239 r = amdgpu_cs_query_fence_status(&fence_status,
4240 AMDGPU_TIMEOUT_INFINITE,
4241 0, &expired);
4242
4243 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4244 CU_ASSERT_EQUAL(r, 0);
4245 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4246
4247 r = amdgpu_bo_list_destroy(bo_list);
4248 CU_ASSERT_EQUAL(r, 0);
4249
4250 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4251 CU_ASSERT_EQUAL(r, 0);
4252 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4253 CU_ASSERT_EQUAL(r, 0);
4254
4255 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4256 CU_ASSERT_EQUAL(r, 0);
4257
4258 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
4259 CU_ASSERT_EQUAL(r, 0);
4260 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
4261 CU_ASSERT_EQUAL(r, 0);
4262
4263 r = amdgpu_cs_ctx_free(context_handle);
4264 CU_ASSERT_EQUAL(r, 0);
4265 }
4266
amdgpu_gpu_reset_test(void)4267 static void amdgpu_gpu_reset_test(void)
4268 {
4269 int r;
4270 char debugfs_path[256], tmp[10];
4271 int fd;
4272 struct stat sbuf;
4273 amdgpu_context_handle context_handle;
4274 uint32_t hang_state, hangs;
4275
4276 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4277 CU_ASSERT_EQUAL(r, 0);
4278
4279 r = fstat(drm_amdgpu[0], &sbuf);
4280 CU_ASSERT_EQUAL(r, 0);
4281
4282 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
4283 fd = open(debugfs_path, O_RDONLY);
4284 CU_ASSERT(fd >= 0);
4285
4286 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
4287 CU_ASSERT(r > 0);
4288
4289 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4290 CU_ASSERT_EQUAL(r, 0);
4291 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4292
4293 close(fd);
4294 r = amdgpu_cs_ctx_free(context_handle);
4295 CU_ASSERT_EQUAL(r, 0);
4296
4297 amdgpu_compute_dispatch_test();
4298 amdgpu_gfx_dispatch_test();
4299 }
4300
amdgpu_stable_pstate_test(void)4301 static void amdgpu_stable_pstate_test(void)
4302 {
4303 int r;
4304 amdgpu_context_handle context_handle;
4305 uint32_t current_pstate = 0, new_pstate = 0;
4306
4307 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4308 CU_ASSERT_EQUAL(r, 0);
4309
4310 r = amdgpu_cs_ctx_stable_pstate(context_handle,
4311 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4312 0, ¤t_pstate);
4313 CU_ASSERT_EQUAL(r, 0);
4314 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
4315
4316 r = amdgpu_cs_ctx_stable_pstate(context_handle,
4317 AMDGPU_CTX_OP_SET_STABLE_PSTATE,
4318 AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
4319 CU_ASSERT_EQUAL(r, 0);
4320
4321 r = amdgpu_cs_ctx_stable_pstate(context_handle,
4322 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4323 0, &new_pstate);
4324 CU_ASSERT_EQUAL(r, 0);
4325 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
4326
4327 r = amdgpu_cs_ctx_free(context_handle);
4328 CU_ASSERT_EQUAL(r, 0);
4329 }
4330