1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include "CUnit/Basic.h"
25 #include "xf86drm.h"
26
27 #include "amdgpu_test.h"
28 #include "amdgpu_drm.h"
29 #include "amdgpu_internal.h"
30 #include <pthread.h>
31
32 static amdgpu_device_handle device_handle;
33 static uint32_t major_version;
34 static uint32_t minor_version;
35
36 static uint32_t family_id;
37 static uint32_t chip_id;
38 static uint32_t chip_rev;
39
40 static void amdgpu_syncobj_timeline_test(void);
41
suite_syncobj_timeline_tests_enable(void)42 CU_BOOL suite_syncobj_timeline_tests_enable(void)
43 {
44 int r;
45 uint64_t cap = 0;
46
47 r = drmGetCap(drm_amdgpu[0], DRM_CAP_SYNCOBJ_TIMELINE, &cap);
48 if (r || cap == 0)
49 return CU_FALSE;
50
51 return CU_TRUE;
52 }
53
suite_syncobj_timeline_tests_init(void)54 int suite_syncobj_timeline_tests_init(void)
55 {
56 int r;
57
58 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
59 &minor_version, &device_handle);
60
61 if (r) {
62 if ((r == -EACCES) && (errno == EACCES))
63 printf("\n\nError:%s. "
64 "Hint:Try to run this test program as root.",
65 strerror(errno));
66 return CUE_SINIT_FAILED;
67 }
68
69 return CUE_SUCCESS;
70 }
71
suite_syncobj_timeline_tests_clean(void)72 int suite_syncobj_timeline_tests_clean(void)
73 {
74 int r = amdgpu_device_deinitialize(device_handle);
75
76 if (r == 0)
77 return CUE_SUCCESS;
78 else
79 return CUE_SCLEAN_FAILED;
80 }
81
82
83 CU_TestInfo syncobj_timeline_tests[] = {
84 { "syncobj timeline test", amdgpu_syncobj_timeline_test },
85 CU_TEST_INFO_NULL,
86 };
87
88 #define GFX_COMPUTE_NOP 0xffff1000
89 #define SDMA_NOP 0x0
syncobj_command_submission_helper(uint32_t syncobj_handle,bool wait_or_signal,uint64_t point)90 static int syncobj_command_submission_helper(uint32_t syncobj_handle, bool
91 wait_or_signal, uint64_t point)
92 {
93 amdgpu_context_handle context_handle;
94 amdgpu_bo_handle ib_result_handle;
95 void *ib_result_cpu;
96 uint64_t ib_result_mc_address;
97 struct drm_amdgpu_cs_chunk chunks[2];
98 struct drm_amdgpu_cs_chunk_data chunk_data;
99 struct drm_amdgpu_cs_chunk_syncobj syncobj_data;
100 struct amdgpu_cs_fence fence_status;
101 amdgpu_bo_list_handle bo_list;
102 amdgpu_va_handle va_handle;
103 uint32_t expired;
104 int i, r;
105 uint64_t seq_no;
106 static uint32_t *ptr;
107 struct amdgpu_gpu_info gpu_info = {0};
108 unsigned gc_ip_type;
109
110 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
111 CU_ASSERT_EQUAL(r, 0);
112
113 family_id = device_handle->info.family_id;
114 chip_id = device_handle->info.chip_external_rev;
115 chip_rev = device_handle->info.chip_rev;
116
117 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
118 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
119
120 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
121 CU_ASSERT_EQUAL(r, 0);
122
123 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
124 AMDGPU_GEM_DOMAIN_GTT, 0,
125 &ib_result_handle, &ib_result_cpu,
126 &ib_result_mc_address, &va_handle);
127 CU_ASSERT_EQUAL(r, 0);
128
129 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
130 &bo_list);
131 CU_ASSERT_EQUAL(r, 0);
132
133 ptr = ib_result_cpu;
134
135 for (i = 0; i < 16; ++i)
136 ptr[i] = wait_or_signal ? GFX_COMPUTE_NOP: SDMA_NOP;
137
138 chunks[0].chunk_id = AMDGPU_CHUNK_ID_IB;
139 chunks[0].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
140 chunks[0].chunk_data = (uint64_t)(uintptr_t)&chunk_data;
141 chunk_data.ib_data._pad = 0;
142 chunk_data.ib_data.va_start = ib_result_mc_address;
143 chunk_data.ib_data.ib_bytes = 16 * 4;
144 chunk_data.ib_data.ip_type = wait_or_signal ? gc_ip_type :
145 AMDGPU_HW_IP_DMA;
146 chunk_data.ib_data.ip_instance = 0;
147 chunk_data.ib_data.ring = 0;
148 chunk_data.ib_data.flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
149
150 chunks[1].chunk_id = wait_or_signal ?
151 AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT :
152 AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL;
153 chunks[1].length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4;
154 chunks[1].chunk_data = (uint64_t)(uintptr_t)&syncobj_data;
155 syncobj_data.handle = syncobj_handle;
156 syncobj_data.point = point;
157 syncobj_data.flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
158
159 r = amdgpu_cs_submit_raw(device_handle,
160 context_handle,
161 bo_list,
162 2,
163 chunks,
164 &seq_no);
165 CU_ASSERT_EQUAL(r, 0);
166
167
168 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
169 fence_status.context = context_handle;
170 fence_status.ip_type = wait_or_signal ? gc_ip_type :
171 AMDGPU_HW_IP_DMA;
172 fence_status.ip_instance = 0;
173 fence_status.ring = 0;
174 fence_status.fence = seq_no;
175
176 r = amdgpu_cs_query_fence_status(&fence_status,
177 AMDGPU_TIMEOUT_INFINITE,0, &expired);
178 CU_ASSERT_EQUAL(r, 0);
179
180 r = amdgpu_bo_list_destroy(bo_list);
181 CU_ASSERT_EQUAL(r, 0);
182
183 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
184 ib_result_mc_address, 4096);
185 CU_ASSERT_EQUAL(r, 0);
186
187 r = amdgpu_cs_ctx_free(context_handle);
188 CU_ASSERT_EQUAL(r, 0);
189
190 return r;
191 }
192
193 struct syncobj_point {
194 uint32_t syncobj_handle;
195 uint64_t point;
196 };
197
syncobj_wait(void * data)198 static void *syncobj_wait(void *data)
199 {
200 struct syncobj_point *sp = (struct syncobj_point *)data;
201 int r;
202
203 r = syncobj_command_submission_helper(sp->syncobj_handle, true,
204 sp->point);
205 CU_ASSERT_EQUAL(r, 0);
206
207 return (void *)(long)r;
208 }
209
syncobj_signal(void * data)210 static void *syncobj_signal(void *data)
211 {
212 struct syncobj_point *sp = (struct syncobj_point *)data;
213 int r;
214
215 r = syncobj_command_submission_helper(sp->syncobj_handle, false,
216 sp->point);
217 CU_ASSERT_EQUAL(r, 0);
218
219 return (void *)(long)r;
220 }
221
amdgpu_syncobj_timeline_test(void)222 static void amdgpu_syncobj_timeline_test(void)
223 {
224 static pthread_t wait_thread;
225 static pthread_t signal_thread;
226 static pthread_t c_thread;
227 struct syncobj_point sp1, sp2, sp3;
228 uint32_t syncobj_handle;
229 uint64_t payload;
230 uint64_t wait_point, signal_point;
231 uint64_t timeout;
232 struct timespec tp;
233 int r, sync_fd;
234 void *tmp;
235
236 r = amdgpu_cs_create_syncobj2(device_handle, 0, &syncobj_handle);
237 CU_ASSERT_EQUAL(r, 0);
238
239 // wait on point 5
240 sp1.syncobj_handle = syncobj_handle;
241 sp1.point = 5;
242 r = pthread_create(&wait_thread, NULL, syncobj_wait, &sp1);
243 CU_ASSERT_EQUAL(r, 0);
244
245 // signal on point 10
246 sp2.syncobj_handle = syncobj_handle;
247 sp2.point = 10;
248 r = pthread_create(&signal_thread, NULL, syncobj_signal, &sp2);
249 CU_ASSERT_EQUAL(r, 0);
250
251 r = pthread_join(wait_thread, &tmp);
252 CU_ASSERT_EQUAL(r, 0);
253 CU_ASSERT_EQUAL(tmp, 0);
254
255 r = pthread_join(signal_thread, &tmp);
256 CU_ASSERT_EQUAL(r, 0);
257 CU_ASSERT_EQUAL(tmp, 0);
258
259 //query timeline payload
260 r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
261 &payload, 1);
262 CU_ASSERT_EQUAL(r, 0);
263 CU_ASSERT_EQUAL(payload, 10);
264
265 //signal on point 16
266 sp3.syncobj_handle = syncobj_handle;
267 sp3.point = 16;
268 r = pthread_create(&c_thread, NULL, syncobj_signal, &sp3);
269 CU_ASSERT_EQUAL(r, 0);
270 //CPU wait on point 16
271 wait_point = 16;
272 timeout = 0;
273 clock_gettime(CLOCK_MONOTONIC, &tp);
274 timeout = tp.tv_sec * 1000000000ULL + tp.tv_nsec;
275 timeout += 0x10000000000; //10s
276 r = amdgpu_cs_syncobj_timeline_wait(device_handle, &syncobj_handle,
277 &wait_point, 1, timeout,
278 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
279 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
280 NULL);
281
282 CU_ASSERT_EQUAL(r, 0);
283 r = pthread_join(c_thread, &tmp);
284 CU_ASSERT_EQUAL(r, 0);
285 CU_ASSERT_EQUAL(tmp, 0);
286
287 // export point 16 and import to point 18
288 r = amdgpu_cs_syncobj_export_sync_file2(device_handle, syncobj_handle,
289 16,
290 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
291 &sync_fd);
292 CU_ASSERT_EQUAL(r, 0);
293 r = amdgpu_cs_syncobj_import_sync_file2(device_handle, syncobj_handle,
294 18, sync_fd);
295 CU_ASSERT_EQUAL(r, 0);
296 r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
297 &payload, 1);
298 CU_ASSERT_EQUAL(r, 0);
299 CU_ASSERT_EQUAL(payload, 18);
300
301 // CPU signal on point 20
302 signal_point = 20;
303 r = amdgpu_cs_syncobj_timeline_signal(device_handle, &syncobj_handle,
304 &signal_point, 1);
305 CU_ASSERT_EQUAL(r, 0);
306 r = amdgpu_cs_syncobj_query(device_handle, &syncobj_handle,
307 &payload, 1);
308 CU_ASSERT_EQUAL(r, 0);
309 CU_ASSERT_EQUAL(payload, 20);
310
311 r = amdgpu_cs_destroy_syncobj(device_handle, syncobj_handle);
312 CU_ASSERT_EQUAL(r, 0);
313
314 }
315