1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #ifdef HAVE_ALLOCA_H
28 # include <alloca.h>
29 #endif
30 #include <sys/wait.h>
31
32 #include "CUnit/Basic.h"
33
34 #include "amdgpu_test.h"
35 #include "amdgpu_drm.h"
36
37 static amdgpu_device_handle device_handle;
38 static uint32_t major_version;
39 static uint32_t minor_version;
40 static uint32_t family_id;
41
42 static void amdgpu_query_info_test(void);
43 static void amdgpu_command_submission_gfx(void);
44 static void amdgpu_command_submission_compute(void);
45 static void amdgpu_command_submission_multi_fence(void);
46 static void amdgpu_command_submission_sdma(void);
47 static void amdgpu_userptr_test(void);
48 static void amdgpu_semaphore_test(void);
49 static void amdgpu_sync_dependency_test(void);
50 static void amdgpu_bo_eviction_test(void);
51
52 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
53 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
54 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
55 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
56 unsigned ip_type,
57 int instance, int pm4_dw, uint32_t *pm4_src,
58 int res_cnt, amdgpu_bo_handle *resources,
59 struct amdgpu_cs_ib_info *ib_info,
60 struct amdgpu_cs_request *ibs_request);
61
62 CU_TestInfo basic_tests[] = {
63 { "Query Info Test", amdgpu_query_info_test },
64 { "Userptr Test", amdgpu_userptr_test },
65 { "bo eviction Test", amdgpu_bo_eviction_test },
66 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
67 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
68 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
69 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
70 { "SW semaphore Test", amdgpu_semaphore_test },
71 { "Sync dependency Test", amdgpu_sync_dependency_test },
72 CU_TEST_INFO_NULL,
73 };
74 #define BUFFER_SIZE (8 * 1024)
75 #define SDMA_PKT_HEADER_op_offset 0
76 #define SDMA_PKT_HEADER_op_mask 0x000000FF
77 #define SDMA_PKT_HEADER_op_shift 0
78 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
79 #define SDMA_OPCODE_CONSTANT_FILL 11
80 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
81 /* 0 = byte fill
82 * 2 = DW fill
83 */
84 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
85 (((sub_op) & 0xFF) << 8) | \
86 (((op) & 0xFF) << 0))
87 #define SDMA_OPCODE_WRITE 2
88 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
89 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
90
91 #define SDMA_OPCODE_COPY 1
92 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
93
94 #define GFX_COMPUTE_NOP 0xffff1000
95 #define SDMA_NOP 0x0
96
97 /* PM4 */
98 #define PACKET_TYPE0 0
99 #define PACKET_TYPE1 1
100 #define PACKET_TYPE2 2
101 #define PACKET_TYPE3 3
102
103 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
104 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
105 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
106 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
107 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
108 ((reg) & 0xFFFF) | \
109 ((n) & 0x3FFF) << 16)
110 #define CP_PACKET2 0x80000000
111 #define PACKET2_PAD_SHIFT 0
112 #define PACKET2_PAD_MASK (0x3fffffff << 0)
113
114 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
115
116 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
117 (((op) & 0xFF) << 8) | \
118 ((n) & 0x3FFF) << 16)
119
120 /* Packet 3 types */
121 #define PACKET3_NOP 0x10
122
123 #define PACKET3_WRITE_DATA 0x37
124 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
125 /* 0 - register
126 * 1 - memory (sync - via GRBM)
127 * 2 - gl2
128 * 3 - gds
129 * 4 - reserved
130 * 5 - memory (async - direct)
131 */
132 #define WR_ONE_ADDR (1 << 16)
133 #define WR_CONFIRM (1 << 20)
134 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
135 /* 0 - LRU
136 * 1 - Stream
137 */
138 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
139 /* 0 - me
140 * 1 - pfp
141 * 2 - ce
142 */
143
144 #define PACKET3_DMA_DATA 0x50
145 /* 1. header
146 * 2. CONTROL
147 * 3. SRC_ADDR_LO or DATA [31:0]
148 * 4. SRC_ADDR_HI [31:0]
149 * 5. DST_ADDR_LO [31:0]
150 * 6. DST_ADDR_HI [7:0]
151 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
152 */
153 /* CONTROL */
154 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
155 /* 0 - ME
156 * 1 - PFP
157 */
158 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
159 /* 0 - LRU
160 * 1 - Stream
161 * 2 - Bypass
162 */
163 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
164 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
165 /* 0 - DST_ADDR using DAS
166 * 1 - GDS
167 * 3 - DST_ADDR using L2
168 */
169 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
170 /* 0 - LRU
171 * 1 - Stream
172 * 2 - Bypass
173 */
174 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
175 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
176 /* 0 - SRC_ADDR using SAS
177 * 1 - GDS
178 * 2 - DATA
179 * 3 - SRC_ADDR using L2
180 */
181 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
182 /* COMMAND */
183 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
184 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
185 /* 0 - none
186 * 1 - 8 in 16
187 * 2 - 8 in 32
188 * 3 - 8 in 64
189 */
190 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
191 /* 0 - none
192 * 1 - 8 in 16
193 * 2 - 8 in 32
194 * 3 - 8 in 64
195 */
196 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
197 /* 0 - memory
198 * 1 - register
199 */
200 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
201 /* 0 - memory
202 * 1 - register
203 */
204 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
205 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
206 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
207
208 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
209 (((b) & 0x1) << 26) | \
210 (((t) & 0x1) << 23) | \
211 (((s) & 0x1) << 22) | \
212 (((cnt) & 0xFFFFF) << 0))
213 #define SDMA_OPCODE_COPY_SI 3
214 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
215 #define SDMA_NOP_SI 0xf
216 #define GFX_COMPUTE_NOP_SI 0x80000000
217 #define PACKET3_DMA_DATA_SI 0x41
218 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
219 /* 0 - ME
220 * 1 - PFP
221 */
222 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
223 /* 0 - DST_ADDR using DAS
224 * 1 - GDS
225 * 3 - DST_ADDR using L2
226 */
227 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
228 /* 0 - SRC_ADDR using SAS
229 * 1 - GDS
230 * 2 - DATA
231 * 3 - SRC_ADDR using L2
232 */
233 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
234
235
236 #define PKT3_CONTEXT_CONTROL 0x28
237 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
238 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
239 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
240
241 #define PKT3_CLEAR_STATE 0x12
242
243 #define PKT3_SET_SH_REG 0x76
244 #define PACKET3_SET_SH_REG_START 0x00002c00
245
246 #define PACKET3_DISPATCH_DIRECT 0x15
247
248
249 /* gfx 8 */
250 #define mmCOMPUTE_PGM_LO 0x2e0c
251 #define mmCOMPUTE_PGM_RSRC1 0x2e12
252 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
253 #define mmCOMPUTE_USER_DATA_0 0x2e40
254 #define mmCOMPUTE_USER_DATA_1 0x2e41
255 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
256 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
257
258
259
260 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
261 ((num & 0x0000ff00) << 8) | \
262 ((num & 0x00ff0000) >> 8) | \
263 ((num & 0x000000ff) << 24))
264
265
266 /* Shader code
267 * void main()
268 {
269
270 float x = some_input;
271 for (unsigned i = 0; i < 1000000; i++)
272 x = sin(x);
273
274 u[0] = 42u;
275 }
276 */
277
278 static uint32_t shader_bin[] = {
279 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
280 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
281 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
282 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
283 };
284
285 #define CODE_OFFSET 512
286 #define DATA_OFFSET 1024
287
288
suite_basic_tests_init(void)289 int suite_basic_tests_init(void)
290 {
291 struct amdgpu_gpu_info gpu_info = {0};
292 int r;
293
294 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
295 &minor_version, &device_handle);
296
297 if (r) {
298 if ((r == -EACCES) && (errno == EACCES))
299 printf("\n\nError:%s. "
300 "Hint:Try to run this test program as root.",
301 strerror(errno));
302 return CUE_SINIT_FAILED;
303 }
304
305 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
306 if (r)
307 return CUE_SINIT_FAILED;
308
309 family_id = gpu_info.family_id;
310
311 return CUE_SUCCESS;
312 }
313
suite_basic_tests_clean(void)314 int suite_basic_tests_clean(void)
315 {
316 int r = amdgpu_device_deinitialize(device_handle);
317
318 if (r == 0)
319 return CUE_SUCCESS;
320 else
321 return CUE_SCLEAN_FAILED;
322 }
323
amdgpu_query_info_test(void)324 static void amdgpu_query_info_test(void)
325 {
326 struct amdgpu_gpu_info gpu_info = {0};
327 uint32_t version, feature;
328 int r;
329
330 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
331 CU_ASSERT_EQUAL(r, 0);
332
333 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
334 0, &version, &feature);
335 CU_ASSERT_EQUAL(r, 0);
336 }
337
amdgpu_command_submission_gfx_separate_ibs(void)338 static void amdgpu_command_submission_gfx_separate_ibs(void)
339 {
340 amdgpu_context_handle context_handle;
341 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
342 void *ib_result_cpu, *ib_result_ce_cpu;
343 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
344 struct amdgpu_cs_request ibs_request = {0};
345 struct amdgpu_cs_ib_info ib_info[2];
346 struct amdgpu_cs_fence fence_status = {0};
347 uint32_t *ptr;
348 uint32_t expired;
349 amdgpu_bo_list_handle bo_list;
350 amdgpu_va_handle va_handle, va_handle_ce;
351 int r, i = 0;
352
353 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
354 CU_ASSERT_EQUAL(r, 0);
355
356 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
357 AMDGPU_GEM_DOMAIN_GTT, 0,
358 &ib_result_handle, &ib_result_cpu,
359 &ib_result_mc_address, &va_handle);
360 CU_ASSERT_EQUAL(r, 0);
361
362 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
363 AMDGPU_GEM_DOMAIN_GTT, 0,
364 &ib_result_ce_handle, &ib_result_ce_cpu,
365 &ib_result_ce_mc_address, &va_handle_ce);
366 CU_ASSERT_EQUAL(r, 0);
367
368 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
369 ib_result_ce_handle, &bo_list);
370 CU_ASSERT_EQUAL(r, 0);
371
372 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
373
374 /* IT_SET_CE_DE_COUNTERS */
375 ptr = ib_result_ce_cpu;
376 if (family_id != AMDGPU_FAMILY_SI) {
377 ptr[i++] = 0xc0008900;
378 ptr[i++] = 0;
379 }
380 ptr[i++] = 0xc0008400;
381 ptr[i++] = 1;
382 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
383 ib_info[0].size = i;
384 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
385
386 /* IT_WAIT_ON_CE_COUNTER */
387 ptr = ib_result_cpu;
388 ptr[0] = 0xc0008600;
389 ptr[1] = 0x00000001;
390 ib_info[1].ib_mc_address = ib_result_mc_address;
391 ib_info[1].size = 2;
392
393 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
394 ibs_request.number_of_ibs = 2;
395 ibs_request.ibs = ib_info;
396 ibs_request.resources = bo_list;
397 ibs_request.fence_info.handle = NULL;
398
399 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
400
401 CU_ASSERT_EQUAL(r, 0);
402
403 fence_status.context = context_handle;
404 fence_status.ip_type = AMDGPU_HW_IP_GFX;
405 fence_status.ip_instance = 0;
406 fence_status.fence = ibs_request.seq_no;
407
408 r = amdgpu_cs_query_fence_status(&fence_status,
409 AMDGPU_TIMEOUT_INFINITE,
410 0, &expired);
411 CU_ASSERT_EQUAL(r, 0);
412
413 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
414 ib_result_mc_address, 4096);
415 CU_ASSERT_EQUAL(r, 0);
416
417 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
418 ib_result_ce_mc_address, 4096);
419 CU_ASSERT_EQUAL(r, 0);
420
421 r = amdgpu_bo_list_destroy(bo_list);
422 CU_ASSERT_EQUAL(r, 0);
423
424 r = amdgpu_cs_ctx_free(context_handle);
425 CU_ASSERT_EQUAL(r, 0);
426
427 }
428
amdgpu_command_submission_gfx_shared_ib(void)429 static void amdgpu_command_submission_gfx_shared_ib(void)
430 {
431 amdgpu_context_handle context_handle;
432 amdgpu_bo_handle ib_result_handle;
433 void *ib_result_cpu;
434 uint64_t ib_result_mc_address;
435 struct amdgpu_cs_request ibs_request = {0};
436 struct amdgpu_cs_ib_info ib_info[2];
437 struct amdgpu_cs_fence fence_status = {0};
438 uint32_t *ptr;
439 uint32_t expired;
440 amdgpu_bo_list_handle bo_list;
441 amdgpu_va_handle va_handle;
442 int r, i = 0;
443
444 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
445 CU_ASSERT_EQUAL(r, 0);
446
447 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
448 AMDGPU_GEM_DOMAIN_GTT, 0,
449 &ib_result_handle, &ib_result_cpu,
450 &ib_result_mc_address, &va_handle);
451 CU_ASSERT_EQUAL(r, 0);
452
453 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
454 &bo_list);
455 CU_ASSERT_EQUAL(r, 0);
456
457 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
458
459 /* IT_SET_CE_DE_COUNTERS */
460 ptr = ib_result_cpu;
461 if (family_id != AMDGPU_FAMILY_SI) {
462 ptr[i++] = 0xc0008900;
463 ptr[i++] = 0;
464 }
465 ptr[i++] = 0xc0008400;
466 ptr[i++] = 1;
467 ib_info[0].ib_mc_address = ib_result_mc_address;
468 ib_info[0].size = i;
469 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
470
471 ptr = (uint32_t *)ib_result_cpu + 4;
472 ptr[0] = 0xc0008600;
473 ptr[1] = 0x00000001;
474 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
475 ib_info[1].size = 2;
476
477 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
478 ibs_request.number_of_ibs = 2;
479 ibs_request.ibs = ib_info;
480 ibs_request.resources = bo_list;
481 ibs_request.fence_info.handle = NULL;
482
483 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
484
485 CU_ASSERT_EQUAL(r, 0);
486
487 fence_status.context = context_handle;
488 fence_status.ip_type = AMDGPU_HW_IP_GFX;
489 fence_status.ip_instance = 0;
490 fence_status.fence = ibs_request.seq_no;
491
492 r = amdgpu_cs_query_fence_status(&fence_status,
493 AMDGPU_TIMEOUT_INFINITE,
494 0, &expired);
495 CU_ASSERT_EQUAL(r, 0);
496
497 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
498 ib_result_mc_address, 4096);
499 CU_ASSERT_EQUAL(r, 0);
500
501 r = amdgpu_bo_list_destroy(bo_list);
502 CU_ASSERT_EQUAL(r, 0);
503
504 r = amdgpu_cs_ctx_free(context_handle);
505 CU_ASSERT_EQUAL(r, 0);
506 }
507
amdgpu_command_submission_gfx_cp_write_data(void)508 static void amdgpu_command_submission_gfx_cp_write_data(void)
509 {
510 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
511 }
512
amdgpu_command_submission_gfx_cp_const_fill(void)513 static void amdgpu_command_submission_gfx_cp_const_fill(void)
514 {
515 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
516 }
517
amdgpu_command_submission_gfx_cp_copy_data(void)518 static void amdgpu_command_submission_gfx_cp_copy_data(void)
519 {
520 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
521 }
522
amdgpu_bo_eviction_test(void)523 static void amdgpu_bo_eviction_test(void)
524 {
525 const int sdma_write_length = 1024;
526 const int pm4_dw = 256;
527 amdgpu_context_handle context_handle;
528 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
529 amdgpu_bo_handle *resources;
530 uint32_t *pm4;
531 struct amdgpu_cs_ib_info *ib_info;
532 struct amdgpu_cs_request *ibs_request;
533 uint64_t bo1_mc, bo2_mc;
534 volatile unsigned char *bo1_cpu, *bo2_cpu;
535 int i, j, r, loop1, loop2;
536 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
537 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
538 struct amdgpu_heap_info vram_info, gtt_info;
539
540 pm4 = calloc(pm4_dw, sizeof(*pm4));
541 CU_ASSERT_NOT_EQUAL(pm4, NULL);
542
543 ib_info = calloc(1, sizeof(*ib_info));
544 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
545
546 ibs_request = calloc(1, sizeof(*ibs_request));
547 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
548
549 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
550 CU_ASSERT_EQUAL(r, 0);
551
552 /* prepare resource */
553 resources = calloc(4, sizeof(amdgpu_bo_handle));
554 CU_ASSERT_NOT_EQUAL(resources, NULL);
555
556 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
557 0, &vram_info);
558 CU_ASSERT_EQUAL(r, 0);
559
560 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
561 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
562 CU_ASSERT_EQUAL(r, 0);
563 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
564 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
565 CU_ASSERT_EQUAL(r, 0);
566
567 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
568 0, >t_info);
569 CU_ASSERT_EQUAL(r, 0);
570
571 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
572 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
573 CU_ASSERT_EQUAL(r, 0);
574 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
575 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
576 CU_ASSERT_EQUAL(r, 0);
577
578
579
580 loop1 = loop2 = 0;
581 /* run 9 circle to test all mapping combination */
582 while(loop1 < 2) {
583 while(loop2 < 2) {
584 /* allocate UC bo1for sDMA use */
585 r = amdgpu_bo_alloc_and_map(device_handle,
586 sdma_write_length, 4096,
587 AMDGPU_GEM_DOMAIN_GTT,
588 gtt_flags[loop1], &bo1,
589 (void**)&bo1_cpu, &bo1_mc,
590 &bo1_va_handle);
591 CU_ASSERT_EQUAL(r, 0);
592
593 /* set bo1 */
594 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
595
596 /* allocate UC bo2 for sDMA use */
597 r = amdgpu_bo_alloc_and_map(device_handle,
598 sdma_write_length, 4096,
599 AMDGPU_GEM_DOMAIN_GTT,
600 gtt_flags[loop2], &bo2,
601 (void**)&bo2_cpu, &bo2_mc,
602 &bo2_va_handle);
603 CU_ASSERT_EQUAL(r, 0);
604
605 /* clear bo2 */
606 memset((void*)bo2_cpu, 0, sdma_write_length);
607
608 resources[0] = bo1;
609 resources[1] = bo2;
610 resources[2] = vram_max[loop2];
611 resources[3] = gtt_max[loop2];
612
613 /* fulfill PM4: test DMA copy linear */
614 i = j = 0;
615 if (family_id == AMDGPU_FAMILY_SI) {
616 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
617 sdma_write_length);
618 pm4[i++] = 0xffffffff & bo2_mc;
619 pm4[i++] = 0xffffffff & bo1_mc;
620 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
621 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
622 } else {
623 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
624 if (family_id >= AMDGPU_FAMILY_AI)
625 pm4[i++] = sdma_write_length - 1;
626 else
627 pm4[i++] = sdma_write_length;
628 pm4[i++] = 0;
629 pm4[i++] = 0xffffffff & bo1_mc;
630 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
631 pm4[i++] = 0xffffffff & bo2_mc;
632 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
633 }
634
635 amdgpu_test_exec_cs_helper(context_handle,
636 AMDGPU_HW_IP_DMA, 0,
637 i, pm4,
638 4, resources,
639 ib_info, ibs_request);
640
641 /* verify if SDMA test result meets with expected */
642 i = 0;
643 while(i < sdma_write_length) {
644 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
645 }
646 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
647 sdma_write_length);
648 CU_ASSERT_EQUAL(r, 0);
649 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
650 sdma_write_length);
651 CU_ASSERT_EQUAL(r, 0);
652 loop2++;
653 }
654 loop2 = 0;
655 loop1++;
656 }
657 amdgpu_bo_free(vram_max[0]);
658 amdgpu_bo_free(vram_max[1]);
659 amdgpu_bo_free(gtt_max[0]);
660 amdgpu_bo_free(gtt_max[1]);
661 /* clean resources */
662 free(resources);
663 free(ibs_request);
664 free(ib_info);
665 free(pm4);
666
667 /* end of test */
668 r = amdgpu_cs_ctx_free(context_handle);
669 CU_ASSERT_EQUAL(r, 0);
670 }
671
672
amdgpu_command_submission_gfx(void)673 static void amdgpu_command_submission_gfx(void)
674 {
675 /* write data using the CP */
676 amdgpu_command_submission_gfx_cp_write_data();
677 /* const fill using the CP */
678 amdgpu_command_submission_gfx_cp_const_fill();
679 /* copy data using the CP */
680 amdgpu_command_submission_gfx_cp_copy_data();
681 /* separate IB buffers for multi-IB submission */
682 amdgpu_command_submission_gfx_separate_ibs();
683 /* shared IB buffer for multi-IB submission */
684 amdgpu_command_submission_gfx_shared_ib();
685 }
686
amdgpu_semaphore_test(void)687 static void amdgpu_semaphore_test(void)
688 {
689 amdgpu_context_handle context_handle[2];
690 amdgpu_semaphore_handle sem;
691 amdgpu_bo_handle ib_result_handle[2];
692 void *ib_result_cpu[2];
693 uint64_t ib_result_mc_address[2];
694 struct amdgpu_cs_request ibs_request[2] = {0};
695 struct amdgpu_cs_ib_info ib_info[2] = {0};
696 struct amdgpu_cs_fence fence_status = {0};
697 uint32_t *ptr;
698 uint32_t expired;
699 uint32_t sdma_nop, gfx_nop;
700 amdgpu_bo_list_handle bo_list[2];
701 amdgpu_va_handle va_handle[2];
702 int r, i;
703
704 if (family_id == AMDGPU_FAMILY_SI) {
705 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
706 gfx_nop = GFX_COMPUTE_NOP_SI;
707 } else {
708 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
709 gfx_nop = GFX_COMPUTE_NOP;
710 }
711
712 r = amdgpu_cs_create_semaphore(&sem);
713 CU_ASSERT_EQUAL(r, 0);
714 for (i = 0; i < 2; i++) {
715 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
716 CU_ASSERT_EQUAL(r, 0);
717
718 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
719 AMDGPU_GEM_DOMAIN_GTT, 0,
720 &ib_result_handle[i], &ib_result_cpu[i],
721 &ib_result_mc_address[i], &va_handle[i]);
722 CU_ASSERT_EQUAL(r, 0);
723
724 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
725 NULL, &bo_list[i]);
726 CU_ASSERT_EQUAL(r, 0);
727 }
728
729 /* 1. same context different engine */
730 ptr = ib_result_cpu[0];
731 ptr[0] = sdma_nop;
732 ib_info[0].ib_mc_address = ib_result_mc_address[0];
733 ib_info[0].size = 1;
734
735 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
736 ibs_request[0].number_of_ibs = 1;
737 ibs_request[0].ibs = &ib_info[0];
738 ibs_request[0].resources = bo_list[0];
739 ibs_request[0].fence_info.handle = NULL;
740 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
741 CU_ASSERT_EQUAL(r, 0);
742 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
743 CU_ASSERT_EQUAL(r, 0);
744
745 r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
746 CU_ASSERT_EQUAL(r, 0);
747 ptr = ib_result_cpu[1];
748 ptr[0] = gfx_nop;
749 ib_info[1].ib_mc_address = ib_result_mc_address[1];
750 ib_info[1].size = 1;
751
752 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
753 ibs_request[1].number_of_ibs = 1;
754 ibs_request[1].ibs = &ib_info[1];
755 ibs_request[1].resources = bo_list[1];
756 ibs_request[1].fence_info.handle = NULL;
757
758 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
759 CU_ASSERT_EQUAL(r, 0);
760
761 fence_status.context = context_handle[0];
762 fence_status.ip_type = AMDGPU_HW_IP_GFX;
763 fence_status.ip_instance = 0;
764 fence_status.fence = ibs_request[1].seq_no;
765 r = amdgpu_cs_query_fence_status(&fence_status,
766 500000000, 0, &expired);
767 CU_ASSERT_EQUAL(r, 0);
768 CU_ASSERT_EQUAL(expired, true);
769
770 /* 2. same engine different context */
771 ptr = ib_result_cpu[0];
772 ptr[0] = gfx_nop;
773 ib_info[0].ib_mc_address = ib_result_mc_address[0];
774 ib_info[0].size = 1;
775
776 ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
777 ibs_request[0].number_of_ibs = 1;
778 ibs_request[0].ibs = &ib_info[0];
779 ibs_request[0].resources = bo_list[0];
780 ibs_request[0].fence_info.handle = NULL;
781 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
782 CU_ASSERT_EQUAL(r, 0);
783 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
784 CU_ASSERT_EQUAL(r, 0);
785
786 r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
787 CU_ASSERT_EQUAL(r, 0);
788 ptr = ib_result_cpu[1];
789 ptr[0] = gfx_nop;
790 ib_info[1].ib_mc_address = ib_result_mc_address[1];
791 ib_info[1].size = 1;
792
793 ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
794 ibs_request[1].number_of_ibs = 1;
795 ibs_request[1].ibs = &ib_info[1];
796 ibs_request[1].resources = bo_list[1];
797 ibs_request[1].fence_info.handle = NULL;
798 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
799
800 CU_ASSERT_EQUAL(r, 0);
801
802 fence_status.context = context_handle[1];
803 fence_status.ip_type = AMDGPU_HW_IP_GFX;
804 fence_status.ip_instance = 0;
805 fence_status.fence = ibs_request[1].seq_no;
806 r = amdgpu_cs_query_fence_status(&fence_status,
807 500000000, 0, &expired);
808 CU_ASSERT_EQUAL(r, 0);
809 CU_ASSERT_EQUAL(expired, true);
810
811 for (i = 0; i < 2; i++) {
812 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
813 ib_result_mc_address[i], 4096);
814 CU_ASSERT_EQUAL(r, 0);
815
816 r = amdgpu_bo_list_destroy(bo_list[i]);
817 CU_ASSERT_EQUAL(r, 0);
818
819 r = amdgpu_cs_ctx_free(context_handle[i]);
820 CU_ASSERT_EQUAL(r, 0);
821 }
822
823 r = amdgpu_cs_destroy_semaphore(sem);
824 CU_ASSERT_EQUAL(r, 0);
825 }
826
amdgpu_command_submission_compute_nop(void)827 static void amdgpu_command_submission_compute_nop(void)
828 {
829 amdgpu_context_handle context_handle;
830 amdgpu_bo_handle ib_result_handle;
831 void *ib_result_cpu;
832 uint64_t ib_result_mc_address;
833 struct amdgpu_cs_request ibs_request;
834 struct amdgpu_cs_ib_info ib_info;
835 struct amdgpu_cs_fence fence_status;
836 uint32_t *ptr;
837 uint32_t expired;
838 int r, instance;
839 amdgpu_bo_list_handle bo_list;
840 amdgpu_va_handle va_handle;
841 struct drm_amdgpu_info_hw_ip info;
842
843 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
844 CU_ASSERT_EQUAL(r, 0);
845
846 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
847 CU_ASSERT_EQUAL(r, 0);
848
849 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
850 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
851 AMDGPU_GEM_DOMAIN_GTT, 0,
852 &ib_result_handle, &ib_result_cpu,
853 &ib_result_mc_address, &va_handle);
854 CU_ASSERT_EQUAL(r, 0);
855
856 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
857 &bo_list);
858 CU_ASSERT_EQUAL(r, 0);
859
860 ptr = ib_result_cpu;
861 memset(ptr, 0, 16);
862 ptr[0]=PACKET3(PACKET3_NOP, 14);
863
864 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
865 ib_info.ib_mc_address = ib_result_mc_address;
866 ib_info.size = 16;
867
868 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
869 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
870 ibs_request.ring = instance;
871 ibs_request.number_of_ibs = 1;
872 ibs_request.ibs = &ib_info;
873 ibs_request.resources = bo_list;
874 ibs_request.fence_info.handle = NULL;
875
876 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
877 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
878 CU_ASSERT_EQUAL(r, 0);
879
880 fence_status.context = context_handle;
881 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
882 fence_status.ip_instance = 0;
883 fence_status.ring = instance;
884 fence_status.fence = ibs_request.seq_no;
885
886 r = amdgpu_cs_query_fence_status(&fence_status,
887 AMDGPU_TIMEOUT_INFINITE,
888 0, &expired);
889 CU_ASSERT_EQUAL(r, 0);
890
891 r = amdgpu_bo_list_destroy(bo_list);
892 CU_ASSERT_EQUAL(r, 0);
893
894 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
895 ib_result_mc_address, 4096);
896 CU_ASSERT_EQUAL(r, 0);
897 }
898
899 r = amdgpu_cs_ctx_free(context_handle);
900 CU_ASSERT_EQUAL(r, 0);
901 }
902
amdgpu_command_submission_compute_cp_write_data(void)903 static void amdgpu_command_submission_compute_cp_write_data(void)
904 {
905 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
906 }
907
amdgpu_command_submission_compute_cp_const_fill(void)908 static void amdgpu_command_submission_compute_cp_const_fill(void)
909 {
910 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
911 }
912
amdgpu_command_submission_compute_cp_copy_data(void)913 static void amdgpu_command_submission_compute_cp_copy_data(void)
914 {
915 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
916 }
917
amdgpu_command_submission_compute(void)918 static void amdgpu_command_submission_compute(void)
919 {
920 /* write data using the CP */
921 amdgpu_command_submission_compute_cp_write_data();
922 /* const fill using the CP */
923 amdgpu_command_submission_compute_cp_const_fill();
924 /* copy data using the CP */
925 amdgpu_command_submission_compute_cp_copy_data();
926 /* nop test */
927 amdgpu_command_submission_compute_nop();
928 }
929
930 /*
931 * caller need create/release:
932 * pm4_src, resources, ib_info, and ibs_request
933 * submit command stream described in ibs_request and wait for this IB accomplished
934 */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)935 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
936 unsigned ip_type,
937 int instance, int pm4_dw, uint32_t *pm4_src,
938 int res_cnt, amdgpu_bo_handle *resources,
939 struct amdgpu_cs_ib_info *ib_info,
940 struct amdgpu_cs_request *ibs_request)
941 {
942 int r;
943 uint32_t expired;
944 uint32_t *ring_ptr;
945 amdgpu_bo_handle ib_result_handle;
946 void *ib_result_cpu;
947 uint64_t ib_result_mc_address;
948 struct amdgpu_cs_fence fence_status = {0};
949 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
950 amdgpu_va_handle va_handle;
951
952 /* prepare CS */
953 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
954 CU_ASSERT_NOT_EQUAL(resources, NULL);
955 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
956 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
957 CU_ASSERT_TRUE(pm4_dw <= 1024);
958
959 /* allocate IB */
960 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
961 AMDGPU_GEM_DOMAIN_GTT, 0,
962 &ib_result_handle, &ib_result_cpu,
963 &ib_result_mc_address, &va_handle);
964 CU_ASSERT_EQUAL(r, 0);
965
966 /* copy PM4 packet to ring from caller */
967 ring_ptr = ib_result_cpu;
968 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
969
970 ib_info->ib_mc_address = ib_result_mc_address;
971 ib_info->size = pm4_dw;
972
973 ibs_request->ip_type = ip_type;
974 ibs_request->ring = instance;
975 ibs_request->number_of_ibs = 1;
976 ibs_request->ibs = ib_info;
977 ibs_request->fence_info.handle = NULL;
978
979 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
980 all_res[res_cnt] = ib_result_handle;
981
982 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
983 NULL, &ibs_request->resources);
984 CU_ASSERT_EQUAL(r, 0);
985
986 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
987
988 /* submit CS */
989 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
990 CU_ASSERT_EQUAL(r, 0);
991
992 r = amdgpu_bo_list_destroy(ibs_request->resources);
993 CU_ASSERT_EQUAL(r, 0);
994
995 fence_status.ip_type = ip_type;
996 fence_status.ip_instance = 0;
997 fence_status.ring = ibs_request->ring;
998 fence_status.context = context_handle;
999 fence_status.fence = ibs_request->seq_no;
1000
1001 /* wait for IB accomplished */
1002 r = amdgpu_cs_query_fence_status(&fence_status,
1003 AMDGPU_TIMEOUT_INFINITE,
1004 0, &expired);
1005 CU_ASSERT_EQUAL(r, 0);
1006 CU_ASSERT_EQUAL(expired, true);
1007
1008 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1009 ib_result_mc_address, 4096);
1010 CU_ASSERT_EQUAL(r, 0);
1011 }
1012
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1013 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1014 {
1015 const int sdma_write_length = 128;
1016 const int pm4_dw = 256;
1017 amdgpu_context_handle context_handle;
1018 amdgpu_bo_handle bo;
1019 amdgpu_bo_handle *resources;
1020 uint32_t *pm4;
1021 struct amdgpu_cs_ib_info *ib_info;
1022 struct amdgpu_cs_request *ibs_request;
1023 uint64_t bo_mc;
1024 volatile uint32_t *bo_cpu;
1025 int i, j, r, loop, ring_id;
1026 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1027 amdgpu_va_handle va_handle;
1028 struct drm_amdgpu_info_hw_ip hw_ip_info;
1029
1030 pm4 = calloc(pm4_dw, sizeof(*pm4));
1031 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1032
1033 ib_info = calloc(1, sizeof(*ib_info));
1034 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1035
1036 ibs_request = calloc(1, sizeof(*ibs_request));
1037 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1038
1039 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1040 CU_ASSERT_EQUAL(r, 0);
1041
1042 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1043 CU_ASSERT_EQUAL(r, 0);
1044
1045 /* prepare resource */
1046 resources = calloc(1, sizeof(amdgpu_bo_handle));
1047 CU_ASSERT_NOT_EQUAL(resources, NULL);
1048
1049 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1050 loop = 0;
1051 while(loop < 2) {
1052 /* allocate UC bo for sDMA use */
1053 r = amdgpu_bo_alloc_and_map(device_handle,
1054 sdma_write_length * sizeof(uint32_t),
1055 4096, AMDGPU_GEM_DOMAIN_GTT,
1056 gtt_flags[loop], &bo, (void**)&bo_cpu,
1057 &bo_mc, &va_handle);
1058 CU_ASSERT_EQUAL(r, 0);
1059
1060 /* clear bo */
1061 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1062
1063 resources[0] = bo;
1064
1065 /* fulfill PM4: test DMA write-linear */
1066 i = j = 0;
1067 if (ip_type == AMDGPU_HW_IP_DMA) {
1068 if (family_id == AMDGPU_FAMILY_SI)
1069 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1070 sdma_write_length);
1071 else
1072 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1073 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1074 pm4[i++] = 0xffffffff & bo_mc;
1075 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1076 if (family_id >= AMDGPU_FAMILY_AI)
1077 pm4[i++] = sdma_write_length - 1;
1078 else if (family_id != AMDGPU_FAMILY_SI)
1079 pm4[i++] = sdma_write_length;
1080 while(j++ < sdma_write_length)
1081 pm4[i++] = 0xdeadbeaf;
1082 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1083 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1084 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1085 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1086 pm4[i++] = 0xfffffffc & bo_mc;
1087 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1088 while(j++ < sdma_write_length)
1089 pm4[i++] = 0xdeadbeaf;
1090 }
1091
1092 amdgpu_test_exec_cs_helper(context_handle,
1093 ip_type, ring_id,
1094 i, pm4,
1095 1, resources,
1096 ib_info, ibs_request);
1097
1098 /* verify if SDMA test result meets with expected */
1099 i = 0;
1100 while(i < sdma_write_length) {
1101 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1102 }
1103
1104 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1105 sdma_write_length * sizeof(uint32_t));
1106 CU_ASSERT_EQUAL(r, 0);
1107 loop++;
1108 }
1109 }
1110 /* clean resources */
1111 free(resources);
1112 free(ibs_request);
1113 free(ib_info);
1114 free(pm4);
1115
1116 /* end of test */
1117 r = amdgpu_cs_ctx_free(context_handle);
1118 CU_ASSERT_EQUAL(r, 0);
1119 }
1120
amdgpu_command_submission_sdma_write_linear(void)1121 static void amdgpu_command_submission_sdma_write_linear(void)
1122 {
1123 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1124 }
1125
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1126 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1127 {
1128 const int sdma_write_length = 1024 * 1024;
1129 const int pm4_dw = 256;
1130 amdgpu_context_handle context_handle;
1131 amdgpu_bo_handle bo;
1132 amdgpu_bo_handle *resources;
1133 uint32_t *pm4;
1134 struct amdgpu_cs_ib_info *ib_info;
1135 struct amdgpu_cs_request *ibs_request;
1136 uint64_t bo_mc;
1137 volatile uint32_t *bo_cpu;
1138 int i, j, r, loop, ring_id;
1139 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1140 amdgpu_va_handle va_handle;
1141 struct drm_amdgpu_info_hw_ip hw_ip_info;
1142
1143 pm4 = calloc(pm4_dw, sizeof(*pm4));
1144 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1145
1146 ib_info = calloc(1, sizeof(*ib_info));
1147 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1148
1149 ibs_request = calloc(1, sizeof(*ibs_request));
1150 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1151
1152 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1153 CU_ASSERT_EQUAL(r, 0);
1154
1155 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1156 CU_ASSERT_EQUAL(r, 0);
1157
1158 /* prepare resource */
1159 resources = calloc(1, sizeof(amdgpu_bo_handle));
1160 CU_ASSERT_NOT_EQUAL(resources, NULL);
1161
1162 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1163 loop = 0;
1164 while(loop < 2) {
1165 /* allocate UC bo for sDMA use */
1166 r = amdgpu_bo_alloc_and_map(device_handle,
1167 sdma_write_length, 4096,
1168 AMDGPU_GEM_DOMAIN_GTT,
1169 gtt_flags[loop], &bo, (void**)&bo_cpu,
1170 &bo_mc, &va_handle);
1171 CU_ASSERT_EQUAL(r, 0);
1172
1173 /* clear bo */
1174 memset((void*)bo_cpu, 0, sdma_write_length);
1175
1176 resources[0] = bo;
1177
1178 /* fulfill PM4: test DMA const fill */
1179 i = j = 0;
1180 if (ip_type == AMDGPU_HW_IP_DMA) {
1181 if (family_id == AMDGPU_FAMILY_SI) {
1182 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1183 0, 0, 0,
1184 sdma_write_length / 4);
1185 pm4[i++] = 0xfffffffc & bo_mc;
1186 pm4[i++] = 0xdeadbeaf;
1187 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1188 } else {
1189 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1190 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1191 pm4[i++] = 0xffffffff & bo_mc;
1192 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1193 pm4[i++] = 0xdeadbeaf;
1194 if (family_id >= AMDGPU_FAMILY_AI)
1195 pm4[i++] = sdma_write_length - 1;
1196 else
1197 pm4[i++] = sdma_write_length;
1198 }
1199 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1200 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1201 if (family_id == AMDGPU_FAMILY_SI) {
1202 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1203 pm4[i++] = 0xdeadbeaf;
1204 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1205 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1206 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1207 PACKET3_DMA_DATA_SI_CP_SYNC;
1208 pm4[i++] = 0xffffffff & bo_mc;
1209 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1210 pm4[i++] = sdma_write_length;
1211 } else {
1212 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1213 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1214 PACKET3_DMA_DATA_DST_SEL(0) |
1215 PACKET3_DMA_DATA_SRC_SEL(2) |
1216 PACKET3_DMA_DATA_CP_SYNC;
1217 pm4[i++] = 0xdeadbeaf;
1218 pm4[i++] = 0;
1219 pm4[i++] = 0xfffffffc & bo_mc;
1220 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1221 pm4[i++] = sdma_write_length;
1222 }
1223 }
1224
1225 amdgpu_test_exec_cs_helper(context_handle,
1226 ip_type, ring_id,
1227 i, pm4,
1228 1, resources,
1229 ib_info, ibs_request);
1230
1231 /* verify if SDMA test result meets with expected */
1232 i = 0;
1233 while(i < (sdma_write_length / 4)) {
1234 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1235 }
1236
1237 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1238 sdma_write_length);
1239 CU_ASSERT_EQUAL(r, 0);
1240 loop++;
1241 }
1242 }
1243 /* clean resources */
1244 free(resources);
1245 free(ibs_request);
1246 free(ib_info);
1247 free(pm4);
1248
1249 /* end of test */
1250 r = amdgpu_cs_ctx_free(context_handle);
1251 CU_ASSERT_EQUAL(r, 0);
1252 }
1253
amdgpu_command_submission_sdma_const_fill(void)1254 static void amdgpu_command_submission_sdma_const_fill(void)
1255 {
1256 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1257 }
1258
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1259 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1260 {
1261 const int sdma_write_length = 1024;
1262 const int pm4_dw = 256;
1263 amdgpu_context_handle context_handle;
1264 amdgpu_bo_handle bo1, bo2;
1265 amdgpu_bo_handle *resources;
1266 uint32_t *pm4;
1267 struct amdgpu_cs_ib_info *ib_info;
1268 struct amdgpu_cs_request *ibs_request;
1269 uint64_t bo1_mc, bo2_mc;
1270 volatile unsigned char *bo1_cpu, *bo2_cpu;
1271 int i, j, r, loop1, loop2, ring_id;
1272 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1273 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1274 struct drm_amdgpu_info_hw_ip hw_ip_info;
1275
1276 pm4 = calloc(pm4_dw, sizeof(*pm4));
1277 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1278
1279 ib_info = calloc(1, sizeof(*ib_info));
1280 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1281
1282 ibs_request = calloc(1, sizeof(*ibs_request));
1283 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1284
1285 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1286 CU_ASSERT_EQUAL(r, 0);
1287
1288 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1289 CU_ASSERT_EQUAL(r, 0);
1290
1291 /* prepare resource */
1292 resources = calloc(2, sizeof(amdgpu_bo_handle));
1293 CU_ASSERT_NOT_EQUAL(resources, NULL);
1294
1295 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1296 loop1 = loop2 = 0;
1297 /* run 9 circle to test all mapping combination */
1298 while(loop1 < 2) {
1299 while(loop2 < 2) {
1300 /* allocate UC bo1for sDMA use */
1301 r = amdgpu_bo_alloc_and_map(device_handle,
1302 sdma_write_length, 4096,
1303 AMDGPU_GEM_DOMAIN_GTT,
1304 gtt_flags[loop1], &bo1,
1305 (void**)&bo1_cpu, &bo1_mc,
1306 &bo1_va_handle);
1307 CU_ASSERT_EQUAL(r, 0);
1308
1309 /* set bo1 */
1310 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1311
1312 /* allocate UC bo2 for sDMA use */
1313 r = amdgpu_bo_alloc_and_map(device_handle,
1314 sdma_write_length, 4096,
1315 AMDGPU_GEM_DOMAIN_GTT,
1316 gtt_flags[loop2], &bo2,
1317 (void**)&bo2_cpu, &bo2_mc,
1318 &bo2_va_handle);
1319 CU_ASSERT_EQUAL(r, 0);
1320
1321 /* clear bo2 */
1322 memset((void*)bo2_cpu, 0, sdma_write_length);
1323
1324 resources[0] = bo1;
1325 resources[1] = bo2;
1326
1327 /* fulfill PM4: test DMA copy linear */
1328 i = j = 0;
1329 if (ip_type == AMDGPU_HW_IP_DMA) {
1330 if (family_id == AMDGPU_FAMILY_SI) {
1331 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1332 0, 0, 0,
1333 sdma_write_length);
1334 pm4[i++] = 0xffffffff & bo2_mc;
1335 pm4[i++] = 0xffffffff & bo1_mc;
1336 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1337 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1338 } else {
1339 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1340 SDMA_COPY_SUB_OPCODE_LINEAR,
1341 0);
1342 if (family_id >= AMDGPU_FAMILY_AI)
1343 pm4[i++] = sdma_write_length - 1;
1344 else
1345 pm4[i++] = sdma_write_length;
1346 pm4[i++] = 0;
1347 pm4[i++] = 0xffffffff & bo1_mc;
1348 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1349 pm4[i++] = 0xffffffff & bo2_mc;
1350 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1351 }
1352 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1353 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1354 if (family_id == AMDGPU_FAMILY_SI) {
1355 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1356 pm4[i++] = 0xfffffffc & bo1_mc;
1357 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1358 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1359 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1360 PACKET3_DMA_DATA_SI_CP_SYNC |
1361 (0xffff00000000 & bo1_mc) >> 32;
1362 pm4[i++] = 0xfffffffc & bo2_mc;
1363 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1364 pm4[i++] = sdma_write_length;
1365 } else {
1366 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1367 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1368 PACKET3_DMA_DATA_DST_SEL(0) |
1369 PACKET3_DMA_DATA_SRC_SEL(0) |
1370 PACKET3_DMA_DATA_CP_SYNC;
1371 pm4[i++] = 0xfffffffc & bo1_mc;
1372 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1373 pm4[i++] = 0xfffffffc & bo2_mc;
1374 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1375 pm4[i++] = sdma_write_length;
1376 }
1377 }
1378
1379 amdgpu_test_exec_cs_helper(context_handle,
1380 ip_type, ring_id,
1381 i, pm4,
1382 2, resources,
1383 ib_info, ibs_request);
1384
1385 /* verify if SDMA test result meets with expected */
1386 i = 0;
1387 while(i < sdma_write_length) {
1388 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1389 }
1390 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1391 sdma_write_length);
1392 CU_ASSERT_EQUAL(r, 0);
1393 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1394 sdma_write_length);
1395 CU_ASSERT_EQUAL(r, 0);
1396 loop2++;
1397 }
1398 loop1++;
1399 }
1400 }
1401 /* clean resources */
1402 free(resources);
1403 free(ibs_request);
1404 free(ib_info);
1405 free(pm4);
1406
1407 /* end of test */
1408 r = amdgpu_cs_ctx_free(context_handle);
1409 CU_ASSERT_EQUAL(r, 0);
1410 }
1411
amdgpu_command_submission_sdma_copy_linear(void)1412 static void amdgpu_command_submission_sdma_copy_linear(void)
1413 {
1414 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1415 }
1416
amdgpu_command_submission_sdma(void)1417 static void amdgpu_command_submission_sdma(void)
1418 {
1419 amdgpu_command_submission_sdma_write_linear();
1420 amdgpu_command_submission_sdma_const_fill();
1421 amdgpu_command_submission_sdma_copy_linear();
1422 }
1423
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1424 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1425 {
1426 amdgpu_context_handle context_handle;
1427 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1428 void *ib_result_cpu, *ib_result_ce_cpu;
1429 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1430 struct amdgpu_cs_request ibs_request[2] = {0};
1431 struct amdgpu_cs_ib_info ib_info[2];
1432 struct amdgpu_cs_fence fence_status[2] = {0};
1433 uint32_t *ptr;
1434 uint32_t expired;
1435 amdgpu_bo_list_handle bo_list;
1436 amdgpu_va_handle va_handle, va_handle_ce;
1437 int r;
1438 int i = 0, ib_cs_num = 2;
1439
1440 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1441 CU_ASSERT_EQUAL(r, 0);
1442
1443 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1444 AMDGPU_GEM_DOMAIN_GTT, 0,
1445 &ib_result_handle, &ib_result_cpu,
1446 &ib_result_mc_address, &va_handle);
1447 CU_ASSERT_EQUAL(r, 0);
1448
1449 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1450 AMDGPU_GEM_DOMAIN_GTT, 0,
1451 &ib_result_ce_handle, &ib_result_ce_cpu,
1452 &ib_result_ce_mc_address, &va_handle_ce);
1453 CU_ASSERT_EQUAL(r, 0);
1454
1455 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
1456 ib_result_ce_handle, &bo_list);
1457 CU_ASSERT_EQUAL(r, 0);
1458
1459 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1460
1461 /* IT_SET_CE_DE_COUNTERS */
1462 ptr = ib_result_ce_cpu;
1463 if (family_id != AMDGPU_FAMILY_SI) {
1464 ptr[i++] = 0xc0008900;
1465 ptr[i++] = 0;
1466 }
1467 ptr[i++] = 0xc0008400;
1468 ptr[i++] = 1;
1469 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1470 ib_info[0].size = i;
1471 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1472
1473 /* IT_WAIT_ON_CE_COUNTER */
1474 ptr = ib_result_cpu;
1475 ptr[0] = 0xc0008600;
1476 ptr[1] = 0x00000001;
1477 ib_info[1].ib_mc_address = ib_result_mc_address;
1478 ib_info[1].size = 2;
1479
1480 for (i = 0; i < ib_cs_num; i++) {
1481 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1482 ibs_request[i].number_of_ibs = 2;
1483 ibs_request[i].ibs = ib_info;
1484 ibs_request[i].resources = bo_list;
1485 ibs_request[i].fence_info.handle = NULL;
1486 }
1487
1488 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1489
1490 CU_ASSERT_EQUAL(r, 0);
1491
1492 for (i = 0; i < ib_cs_num; i++) {
1493 fence_status[i].context = context_handle;
1494 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1495 fence_status[i].fence = ibs_request[i].seq_no;
1496 }
1497
1498 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1499 AMDGPU_TIMEOUT_INFINITE,
1500 &expired, NULL);
1501 CU_ASSERT_EQUAL(r, 0);
1502
1503 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1504 ib_result_mc_address, 4096);
1505 CU_ASSERT_EQUAL(r, 0);
1506
1507 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1508 ib_result_ce_mc_address, 4096);
1509 CU_ASSERT_EQUAL(r, 0);
1510
1511 r = amdgpu_bo_list_destroy(bo_list);
1512 CU_ASSERT_EQUAL(r, 0);
1513
1514 r = amdgpu_cs_ctx_free(context_handle);
1515 CU_ASSERT_EQUAL(r, 0);
1516 }
1517
amdgpu_command_submission_multi_fence(void)1518 static void amdgpu_command_submission_multi_fence(void)
1519 {
1520 amdgpu_command_submission_multi_fence_wait_all(true);
1521 amdgpu_command_submission_multi_fence_wait_all(false);
1522 }
1523
amdgpu_userptr_test(void)1524 static void amdgpu_userptr_test(void)
1525 {
1526 int i, r, j;
1527 uint32_t *pm4 = NULL;
1528 uint64_t bo_mc;
1529 void *ptr = NULL;
1530 int pm4_dw = 256;
1531 int sdma_write_length = 4;
1532 amdgpu_bo_handle handle;
1533 amdgpu_context_handle context_handle;
1534 struct amdgpu_cs_ib_info *ib_info;
1535 struct amdgpu_cs_request *ibs_request;
1536 amdgpu_bo_handle buf_handle;
1537 amdgpu_va_handle va_handle;
1538
1539 pm4 = calloc(pm4_dw, sizeof(*pm4));
1540 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1541
1542 ib_info = calloc(1, sizeof(*ib_info));
1543 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1544
1545 ibs_request = calloc(1, sizeof(*ibs_request));
1546 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1547
1548 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1549 CU_ASSERT_EQUAL(r, 0);
1550
1551 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1552 CU_ASSERT_NOT_EQUAL(ptr, NULL);
1553 memset(ptr, 0, BUFFER_SIZE);
1554
1555 r = amdgpu_create_bo_from_user_mem(device_handle,
1556 ptr, BUFFER_SIZE, &buf_handle);
1557 CU_ASSERT_EQUAL(r, 0);
1558
1559 r = amdgpu_va_range_alloc(device_handle,
1560 amdgpu_gpu_va_range_general,
1561 BUFFER_SIZE, 1, 0, &bo_mc,
1562 &va_handle, 0);
1563 CU_ASSERT_EQUAL(r, 0);
1564
1565 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1566 CU_ASSERT_EQUAL(r, 0);
1567
1568 handle = buf_handle;
1569
1570 j = i = 0;
1571
1572 if (family_id == AMDGPU_FAMILY_SI)
1573 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1574 sdma_write_length);
1575 else
1576 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1577 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1578 pm4[i++] = 0xffffffff & bo_mc;
1579 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1580 if (family_id >= AMDGPU_FAMILY_AI)
1581 pm4[i++] = sdma_write_length - 1;
1582 else if (family_id != AMDGPU_FAMILY_SI)
1583 pm4[i++] = sdma_write_length;
1584
1585 while (j++ < sdma_write_length)
1586 pm4[i++] = 0xdeadbeaf;
1587
1588 if (!fork()) {
1589 pm4[0] = 0x0;
1590 exit(0);
1591 }
1592
1593 amdgpu_test_exec_cs_helper(context_handle,
1594 AMDGPU_HW_IP_DMA, 0,
1595 i, pm4,
1596 1, &handle,
1597 ib_info, ibs_request);
1598 i = 0;
1599 while (i < sdma_write_length) {
1600 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
1601 }
1602 free(ibs_request);
1603 free(ib_info);
1604 free(pm4);
1605
1606 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1607 CU_ASSERT_EQUAL(r, 0);
1608 r = amdgpu_va_range_free(va_handle);
1609 CU_ASSERT_EQUAL(r, 0);
1610 r = amdgpu_bo_free(buf_handle);
1611 CU_ASSERT_EQUAL(r, 0);
1612 free(ptr);
1613
1614 r = amdgpu_cs_ctx_free(context_handle);
1615 CU_ASSERT_EQUAL(r, 0);
1616
1617 wait(NULL);
1618 }
1619
amdgpu_sync_dependency_test(void)1620 static void amdgpu_sync_dependency_test(void)
1621 {
1622 amdgpu_context_handle context_handle[2];
1623 amdgpu_bo_handle ib_result_handle;
1624 void *ib_result_cpu;
1625 uint64_t ib_result_mc_address;
1626 struct amdgpu_cs_request ibs_request;
1627 struct amdgpu_cs_ib_info ib_info;
1628 struct amdgpu_cs_fence fence_status;
1629 uint32_t expired;
1630 int i, j, r;
1631 amdgpu_bo_list_handle bo_list;
1632 amdgpu_va_handle va_handle;
1633 static uint32_t *ptr;
1634 uint64_t seq_no;
1635
1636 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
1637 CU_ASSERT_EQUAL(r, 0);
1638 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
1639 CU_ASSERT_EQUAL(r, 0);
1640
1641 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
1642 AMDGPU_GEM_DOMAIN_GTT, 0,
1643 &ib_result_handle, &ib_result_cpu,
1644 &ib_result_mc_address, &va_handle);
1645 CU_ASSERT_EQUAL(r, 0);
1646
1647 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1648 &bo_list);
1649 CU_ASSERT_EQUAL(r, 0);
1650
1651 ptr = ib_result_cpu;
1652 i = 0;
1653
1654 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
1655
1656 /* Dispatch minimal init config and verify it's executed */
1657 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
1658 ptr[i++] = 0x80000000;
1659 ptr[i++] = 0x80000000;
1660
1661 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
1662 ptr[i++] = 0x80000000;
1663
1664
1665 /* Program compute regs */
1666 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1667 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1668 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
1669 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
1670
1671
1672 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1673 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
1674 /*
1675 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
1676 SGPRS = 1
1677 PRIORITY = 0
1678 FLOAT_MODE = 192 (0xc0)
1679 PRIV = 0
1680 DX10_CLAMP = 1
1681 DEBUG_MODE = 0
1682 IEEE_MODE = 0
1683 BULKY = 0
1684 CDBG_USER = 0
1685 *
1686 */
1687 ptr[i++] = 0x002c0040;
1688
1689
1690 /*
1691 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
1692 USER_SGPR = 8
1693 TRAP_PRESENT = 0
1694 TGID_X_EN = 0
1695 TGID_Y_EN = 0
1696 TGID_Z_EN = 0
1697 TG_SIZE_EN = 0
1698 TIDIG_COMP_CNT = 0
1699 EXCP_EN_MSB = 0
1700 LDS_SIZE = 0
1701 EXCP_EN = 0
1702 *
1703 */
1704 ptr[i++] = 0x00000010;
1705
1706
1707 /*
1708 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
1709 WAVESIZE = 0
1710 *
1711 */
1712 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1713 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
1714 ptr[i++] = 0x00000100;
1715
1716 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
1717 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
1718 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
1719 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1720
1721 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
1722 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
1723 ptr[i++] = 0;
1724
1725 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
1726 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
1727 ptr[i++] = 1;
1728 ptr[i++] = 1;
1729 ptr[i++] = 1;
1730
1731
1732 /* Dispatch */
1733 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1734 ptr[i++] = 1;
1735 ptr[i++] = 1;
1736 ptr[i++] = 1;
1737 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
1738
1739
1740 while (i & 7)
1741 ptr[i++] = 0xffff1000; /* type3 nop packet */
1742
1743 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1744 ib_info.ib_mc_address = ib_result_mc_address;
1745 ib_info.size = i;
1746
1747 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1748 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1749 ibs_request.ring = 0;
1750 ibs_request.number_of_ibs = 1;
1751 ibs_request.ibs = &ib_info;
1752 ibs_request.resources = bo_list;
1753 ibs_request.fence_info.handle = NULL;
1754
1755 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
1756 CU_ASSERT_EQUAL(r, 0);
1757 seq_no = ibs_request.seq_no;
1758
1759
1760
1761 /* Prepare second command with dependency on the first */
1762 j = i;
1763 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
1764 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1765 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
1766 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
1767 ptr[i++] = 99;
1768
1769 while (i & 7)
1770 ptr[i++] = 0xffff1000; /* type3 nop packet */
1771
1772 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1773 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
1774 ib_info.size = i - j;
1775
1776 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1777 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1778 ibs_request.ring = 0;
1779 ibs_request.number_of_ibs = 1;
1780 ibs_request.ibs = &ib_info;
1781 ibs_request.resources = bo_list;
1782 ibs_request.fence_info.handle = NULL;
1783
1784 ibs_request.number_of_dependencies = 1;
1785
1786 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
1787 ibs_request.dependencies[0].context = context_handle[1];
1788 ibs_request.dependencies[0].ip_instance = 0;
1789 ibs_request.dependencies[0].ring = 0;
1790 ibs_request.dependencies[0].fence = seq_no;
1791
1792
1793 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
1794 CU_ASSERT_EQUAL(r, 0);
1795
1796
1797 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1798 fence_status.context = context_handle[0];
1799 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1800 fence_status.ip_instance = 0;
1801 fence_status.ring = 0;
1802 fence_status.fence = ibs_request.seq_no;
1803
1804 r = amdgpu_cs_query_fence_status(&fence_status,
1805 AMDGPU_TIMEOUT_INFINITE,0, &expired);
1806 CU_ASSERT_EQUAL(r, 0);
1807
1808 /* Expect the second command to wait for shader to complete */
1809 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
1810
1811 r = amdgpu_bo_list_destroy(bo_list);
1812 CU_ASSERT_EQUAL(r, 0);
1813
1814 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1815 ib_result_mc_address, 4096);
1816 CU_ASSERT_EQUAL(r, 0);
1817
1818 r = amdgpu_cs_ctx_free(context_handle[0]);
1819 CU_ASSERT_EQUAL(r, 0);
1820 r = amdgpu_cs_ctx_free(context_handle[1]);
1821 CU_ASSERT_EQUAL(r, 0);
1822
1823 free(ibs_request.dependencies);
1824 }
1825