1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <errno.h>
28 #include <pthread.h>
29 #include <sched.h>
30 #include <sys/ioctl.h>
31 #if HAVE_ALLOCA_H
32 # include <alloca.h>
33 #endif
34
35 #include "xf86drm.h"
36 #include "amdgpu_drm.h"
37 #include "amdgpu_internal.h"
38
39 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem);
40 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem);
41
42 /**
43 * Create command submission context
44 *
45 * \param dev - \c [in] Device handle. See #amdgpu_device_initialize()
46 * \param priority - \c [in] Context creation flags. See AMDGPU_CTX_PRIORITY_*
47 * \param context - \c [out] GPU Context handle
48 *
49 * \return 0 on success otherwise POSIX Error code
50 */
amdgpu_cs_ctx_create2(amdgpu_device_handle dev,uint32_t priority,amdgpu_context_handle * context)51 drm_public int amdgpu_cs_ctx_create2(amdgpu_device_handle dev,
52 uint32_t priority,
53 amdgpu_context_handle *context)
54 {
55 struct amdgpu_context *gpu_context;
56 union drm_amdgpu_ctx args;
57 int i, j, k;
58 int r;
59
60 if (!dev || !context)
61 return -EINVAL;
62
63 gpu_context = calloc(1, sizeof(struct amdgpu_context));
64 if (!gpu_context)
65 return -ENOMEM;
66
67 gpu_context->dev = dev;
68
69 r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL);
70 if (r)
71 goto error;
72
73 /* Create the context */
74 memset(&args, 0, sizeof(args));
75 args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
76 args.in.priority = priority;
77
78 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
79 if (r)
80 goto error;
81
82 gpu_context->id = args.out.alloc.ctx_id;
83 for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
84 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++)
85 for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++)
86 list_inithead(&gpu_context->sem_list[i][j][k]);
87 *context = (amdgpu_context_handle)gpu_context;
88
89 return 0;
90
91 error:
92 pthread_mutex_destroy(&gpu_context->sequence_mutex);
93 free(gpu_context);
94 return r;
95 }
96
amdgpu_cs_ctx_create(amdgpu_device_handle dev,amdgpu_context_handle * context)97 drm_public int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
98 amdgpu_context_handle *context)
99 {
100 return amdgpu_cs_ctx_create2(dev, AMDGPU_CTX_PRIORITY_NORMAL, context);
101 }
102
103 /**
104 * Release command submission context
105 *
106 * \param dev - \c [in] amdgpu device handle
107 * \param context - \c [in] amdgpu context handle
108 *
109 * \return 0 on success otherwise POSIX Error code
110 */
amdgpu_cs_ctx_free(amdgpu_context_handle context)111 drm_public int amdgpu_cs_ctx_free(amdgpu_context_handle context)
112 {
113 union drm_amdgpu_ctx args;
114 int i, j, k;
115 int r;
116
117 if (!context)
118 return -EINVAL;
119
120 pthread_mutex_destroy(&context->sequence_mutex);
121
122 /* now deal with kernel side */
123 memset(&args, 0, sizeof(args));
124 args.in.op = AMDGPU_CTX_OP_FREE_CTX;
125 args.in.ctx_id = context->id;
126 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
127 &args, sizeof(args));
128 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
129 for (j = 0; j < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; j++) {
130 for (k = 0; k < AMDGPU_CS_MAX_RINGS; k++) {
131 amdgpu_semaphore_handle sem;
132 LIST_FOR_EACH_ENTRY(sem, &context->sem_list[i][j][k], list) {
133 list_del(&sem->list);
134 amdgpu_cs_reset_sem(sem);
135 amdgpu_cs_unreference_sem(sem);
136 }
137 }
138 }
139 }
140 free(context);
141
142 return r;
143 }
144
amdgpu_cs_ctx_override_priority(amdgpu_device_handle dev,amdgpu_context_handle context,int master_fd,unsigned priority)145 drm_public int amdgpu_cs_ctx_override_priority(amdgpu_device_handle dev,
146 amdgpu_context_handle context,
147 int master_fd,
148 unsigned priority)
149 {
150 union drm_amdgpu_sched args;
151 int r;
152
153 if (!dev || !context || master_fd < 0)
154 return -EINVAL;
155
156 memset(&args, 0, sizeof(args));
157
158 args.in.op = AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE;
159 args.in.fd = dev->fd;
160 args.in.priority = priority;
161 args.in.ctx_id = context->id;
162
163 r = drmCommandWrite(master_fd, DRM_AMDGPU_SCHED, &args, sizeof(args));
164 if (r)
165 return r;
166
167 return 0;
168 }
169
amdgpu_cs_ctx_stable_pstate(amdgpu_context_handle context,uint32_t op,uint32_t flags,uint32_t * out_flags)170 drm_public int amdgpu_cs_ctx_stable_pstate(amdgpu_context_handle context,
171 uint32_t op,
172 uint32_t flags,
173 uint32_t *out_flags)
174 {
175 union drm_amdgpu_ctx args;
176 int r;
177
178 if (!context)
179 return -EINVAL;
180
181 memset(&args, 0, sizeof(args));
182 args.in.op = op;
183 args.in.ctx_id = context->id;
184 args.in.flags = flags;
185 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
186 &args, sizeof(args));
187 if (!r && out_flags)
188 *out_flags = args.out.pstate.flags;
189 return r;
190 }
191
amdgpu_cs_query_reset_state(amdgpu_context_handle context,uint32_t * state,uint32_t * hangs)192 drm_public int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
193 uint32_t *state, uint32_t *hangs)
194 {
195 union drm_amdgpu_ctx args;
196 int r;
197
198 if (!context)
199 return -EINVAL;
200
201 memset(&args, 0, sizeof(args));
202 args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
203 args.in.ctx_id = context->id;
204 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
205 &args, sizeof(args));
206 if (!r) {
207 *state = args.out.state.reset_status;
208 *hangs = args.out.state.hangs;
209 }
210 return r;
211 }
212
amdgpu_cs_query_reset_state2(amdgpu_context_handle context,uint64_t * flags)213 drm_public int amdgpu_cs_query_reset_state2(amdgpu_context_handle context,
214 uint64_t *flags)
215 {
216 union drm_amdgpu_ctx args;
217 int r;
218
219 if (!context)
220 return -EINVAL;
221
222 memset(&args, 0, sizeof(args));
223 args.in.op = AMDGPU_CTX_OP_QUERY_STATE2;
224 args.in.ctx_id = context->id;
225 r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
226 &args, sizeof(args));
227 if (!r)
228 *flags = args.out.state.flags;
229 return r;
230 }
231
232 /**
233 * Submit command to kernel DRM
234 * \param dev - \c [in] Device handle
235 * \param context - \c [in] GPU Context
236 * \param ibs_request - \c [in] Pointer to submission requests
237 * \param fence - \c [out] return fence for this submission
238 *
239 * \return 0 on success otherwise POSIX Error code
240 * \sa amdgpu_cs_submit()
241 */
amdgpu_cs_submit_one(amdgpu_context_handle context,struct amdgpu_cs_request * ibs_request)242 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
243 struct amdgpu_cs_request *ibs_request)
244 {
245 struct drm_amdgpu_cs_chunk *chunks;
246 struct drm_amdgpu_cs_chunk_data *chunk_data;
247 struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
248 struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
249 amdgpu_device_handle dev = context->dev;
250 struct list_head *sem_list;
251 amdgpu_semaphore_handle sem, tmp;
252 uint32_t i, size, num_chunks, bo_list_handle = 0, sem_count = 0;
253 uint64_t seq_no;
254 bool user_fence;
255 int r = 0;
256
257 if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
258 return -EINVAL;
259 if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
260 return -EINVAL;
261 if (ibs_request->number_of_ibs == 0) {
262 ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
263 return 0;
264 }
265 user_fence = (ibs_request->fence_info.handle != NULL);
266
267 size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
268
269 chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
270
271 size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
272
273 chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
274
275 if (ibs_request->resources)
276 bo_list_handle = ibs_request->resources->handle;
277 num_chunks = ibs_request->number_of_ibs;
278 /* IB chunks */
279 for (i = 0; i < ibs_request->number_of_ibs; i++) {
280 struct amdgpu_cs_ib_info *ib;
281 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
282 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
283 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
284
285 ib = &ibs_request->ibs[i];
286
287 chunk_data[i].ib_data._pad = 0;
288 chunk_data[i].ib_data.va_start = ib->ib_mc_address;
289 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
290 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
291 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
292 chunk_data[i].ib_data.ring = ibs_request->ring;
293 chunk_data[i].ib_data.flags = ib->flags;
294 }
295
296 pthread_mutex_lock(&context->sequence_mutex);
297
298 if (user_fence) {
299 i = num_chunks++;
300
301 /* fence chunk */
302 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
303 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
304 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
305
306 /* fence bo handle */
307 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
308 /* offset */
309 chunk_data[i].fence_data.offset =
310 ibs_request->fence_info.offset * sizeof(uint64_t);
311 }
312
313 if (ibs_request->number_of_dependencies) {
314 dependencies = alloca(sizeof(struct drm_amdgpu_cs_chunk_dep) *
315 ibs_request->number_of_dependencies);
316 if (!dependencies) {
317 r = -ENOMEM;
318 goto error_unlock;
319 }
320
321 for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
322 struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
323 struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
324 dep->ip_type = info->ip_type;
325 dep->ip_instance = info->ip_instance;
326 dep->ring = info->ring;
327 dep->ctx_id = info->context->id;
328 dep->handle = info->fence;
329 }
330
331 i = num_chunks++;
332
333 /* dependencies chunk */
334 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
335 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
336 * ibs_request->number_of_dependencies;
337 chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
338 }
339
340 sem_list = &context->sem_list[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring];
341 LIST_FOR_EACH_ENTRY(sem, sem_list, list)
342 sem_count++;
343 if (sem_count) {
344 sem_dependencies = alloca(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_count);
345 if (!sem_dependencies) {
346 r = -ENOMEM;
347 goto error_unlock;
348 }
349 sem_count = 0;
350 LIST_FOR_EACH_ENTRY_SAFE(sem, tmp, sem_list, list) {
351 struct amdgpu_cs_fence *info = &sem->signal_fence;
352 struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
353 dep->ip_type = info->ip_type;
354 dep->ip_instance = info->ip_instance;
355 dep->ring = info->ring;
356 dep->ctx_id = info->context->id;
357 dep->handle = info->fence;
358
359 list_del(&sem->list);
360 amdgpu_cs_reset_sem(sem);
361 amdgpu_cs_unreference_sem(sem);
362 }
363 i = num_chunks++;
364
365 /* dependencies chunk */
366 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
367 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
368 chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
369 }
370
371 r = amdgpu_cs_submit_raw2(dev, context, bo_list_handle, num_chunks,
372 chunks, &seq_no);
373 if (r)
374 goto error_unlock;
375
376 ibs_request->seq_no = seq_no;
377 context->last_seq[ibs_request->ip_type][ibs_request->ip_instance][ibs_request->ring] = ibs_request->seq_no;
378 error_unlock:
379 pthread_mutex_unlock(&context->sequence_mutex);
380 return r;
381 }
382
amdgpu_cs_submit(amdgpu_context_handle context,uint64_t flags,struct amdgpu_cs_request * ibs_request,uint32_t number_of_requests)383 drm_public int amdgpu_cs_submit(amdgpu_context_handle context,
384 uint64_t flags,
385 struct amdgpu_cs_request *ibs_request,
386 uint32_t number_of_requests)
387 {
388 uint32_t i;
389 int r;
390
391 if (!context || !ibs_request)
392 return -EINVAL;
393
394 r = 0;
395 for (i = 0; i < number_of_requests; i++) {
396 r = amdgpu_cs_submit_one(context, ibs_request);
397 if (r)
398 break;
399 ibs_request++;
400 }
401
402 return r;
403 }
404
405 /**
406 * Calculate absolute timeout.
407 *
408 * \param timeout - \c [in] timeout in nanoseconds.
409 *
410 * \return absolute timeout in nanoseconds
411 */
amdgpu_cs_calculate_timeout(uint64_t timeout)412 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
413 {
414 int r;
415
416 if (timeout != AMDGPU_TIMEOUT_INFINITE) {
417 struct timespec current;
418 uint64_t current_ns;
419 r = clock_gettime(CLOCK_MONOTONIC, ¤t);
420 if (r) {
421 fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
422 return AMDGPU_TIMEOUT_INFINITE;
423 }
424
425 current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
426 current_ns += current.tv_nsec;
427 timeout += current_ns;
428 if (timeout < current_ns)
429 timeout = AMDGPU_TIMEOUT_INFINITE;
430 }
431 return timeout;
432 }
433
amdgpu_ioctl_wait_cs(amdgpu_context_handle context,unsigned ip,unsigned ip_instance,uint32_t ring,uint64_t handle,uint64_t timeout_ns,uint64_t flags,bool * busy)434 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
435 unsigned ip,
436 unsigned ip_instance,
437 uint32_t ring,
438 uint64_t handle,
439 uint64_t timeout_ns,
440 uint64_t flags,
441 bool *busy)
442 {
443 amdgpu_device_handle dev = context->dev;
444 union drm_amdgpu_wait_cs args;
445 int r;
446
447 memset(&args, 0, sizeof(args));
448 args.in.handle = handle;
449 args.in.ip_type = ip;
450 args.in.ip_instance = ip_instance;
451 args.in.ring = ring;
452 args.in.ctx_id = context->id;
453
454 if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
455 args.in.timeout = timeout_ns;
456 else
457 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
458
459 r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
460 if (r)
461 return -errno;
462
463 *busy = args.out.status;
464 return 0;
465 }
466
amdgpu_cs_query_fence_status(struct amdgpu_cs_fence * fence,uint64_t timeout_ns,uint64_t flags,uint32_t * expired)467 drm_public int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
468 uint64_t timeout_ns,
469 uint64_t flags,
470 uint32_t *expired)
471 {
472 bool busy = true;
473 int r;
474
475 if (!fence || !expired || !fence->context)
476 return -EINVAL;
477 if (fence->ip_type >= AMDGPU_HW_IP_NUM)
478 return -EINVAL;
479 if (fence->ring >= AMDGPU_CS_MAX_RINGS)
480 return -EINVAL;
481 if (fence->fence == AMDGPU_NULL_SUBMIT_SEQ) {
482 *expired = true;
483 return 0;
484 }
485
486 *expired = false;
487
488 r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
489 fence->ip_instance, fence->ring,
490 fence->fence, timeout_ns, flags, &busy);
491
492 if (!r && !busy)
493 *expired = true;
494
495 return r;
496 }
497
amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence * fences,uint32_t fence_count,bool wait_all,uint64_t timeout_ns,uint32_t * status,uint32_t * first)498 static int amdgpu_ioctl_wait_fences(struct amdgpu_cs_fence *fences,
499 uint32_t fence_count,
500 bool wait_all,
501 uint64_t timeout_ns,
502 uint32_t *status,
503 uint32_t *first)
504 {
505 struct drm_amdgpu_fence *drm_fences;
506 amdgpu_device_handle dev = fences[0].context->dev;
507 union drm_amdgpu_wait_fences args;
508 int r;
509 uint32_t i;
510
511 drm_fences = alloca(sizeof(struct drm_amdgpu_fence) * fence_count);
512 for (i = 0; i < fence_count; i++) {
513 drm_fences[i].ctx_id = fences[i].context->id;
514 drm_fences[i].ip_type = fences[i].ip_type;
515 drm_fences[i].ip_instance = fences[i].ip_instance;
516 drm_fences[i].ring = fences[i].ring;
517 drm_fences[i].seq_no = fences[i].fence;
518 }
519
520 memset(&args, 0, sizeof(args));
521 args.in.fences = (uint64_t)(uintptr_t)drm_fences;
522 args.in.fence_count = fence_count;
523 args.in.wait_all = wait_all;
524 args.in.timeout_ns = amdgpu_cs_calculate_timeout(timeout_ns);
525
526 r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_FENCES, &args);
527 if (r)
528 return -errno;
529
530 *status = args.out.status;
531
532 if (first)
533 *first = args.out.first_signaled;
534
535 return 0;
536 }
537
amdgpu_cs_wait_fences(struct amdgpu_cs_fence * fences,uint32_t fence_count,bool wait_all,uint64_t timeout_ns,uint32_t * status,uint32_t * first)538 drm_public int amdgpu_cs_wait_fences(struct amdgpu_cs_fence *fences,
539 uint32_t fence_count,
540 bool wait_all,
541 uint64_t timeout_ns,
542 uint32_t *status,
543 uint32_t *first)
544 {
545 uint32_t i;
546
547 /* Sanity check */
548 if (!fences || !status || !fence_count)
549 return -EINVAL;
550
551 for (i = 0; i < fence_count; i++) {
552 if (NULL == fences[i].context)
553 return -EINVAL;
554 if (fences[i].ip_type >= AMDGPU_HW_IP_NUM)
555 return -EINVAL;
556 if (fences[i].ring >= AMDGPU_CS_MAX_RINGS)
557 return -EINVAL;
558 }
559
560 *status = 0;
561
562 return amdgpu_ioctl_wait_fences(fences, fence_count, wait_all,
563 timeout_ns, status, first);
564 }
565
amdgpu_cs_create_semaphore(amdgpu_semaphore_handle * sem)566 drm_public int amdgpu_cs_create_semaphore(amdgpu_semaphore_handle *sem)
567 {
568 struct amdgpu_semaphore *gpu_semaphore;
569
570 if (!sem)
571 return -EINVAL;
572
573 gpu_semaphore = calloc(1, sizeof(struct amdgpu_semaphore));
574 if (!gpu_semaphore)
575 return -ENOMEM;
576
577 atomic_set(&gpu_semaphore->refcount, 1);
578 *sem = gpu_semaphore;
579
580 return 0;
581 }
582
amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)583 drm_public int amdgpu_cs_signal_semaphore(amdgpu_context_handle ctx,
584 uint32_t ip_type,
585 uint32_t ip_instance,
586 uint32_t ring,
587 amdgpu_semaphore_handle sem)
588 {
589 if (!ctx || !sem)
590 return -EINVAL;
591 if (ip_type >= AMDGPU_HW_IP_NUM)
592 return -EINVAL;
593 if (ring >= AMDGPU_CS_MAX_RINGS)
594 return -EINVAL;
595 /* sem has been signaled */
596 if (sem->signal_fence.context)
597 return -EINVAL;
598 pthread_mutex_lock(&ctx->sequence_mutex);
599 sem->signal_fence.context = ctx;
600 sem->signal_fence.ip_type = ip_type;
601 sem->signal_fence.ip_instance = ip_instance;
602 sem->signal_fence.ring = ring;
603 sem->signal_fence.fence = ctx->last_seq[ip_type][ip_instance][ring];
604 update_references(NULL, &sem->refcount);
605 pthread_mutex_unlock(&ctx->sequence_mutex);
606 return 0;
607 }
608
amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,uint32_t ip_type,uint32_t ip_instance,uint32_t ring,amdgpu_semaphore_handle sem)609 drm_public int amdgpu_cs_wait_semaphore(amdgpu_context_handle ctx,
610 uint32_t ip_type,
611 uint32_t ip_instance,
612 uint32_t ring,
613 amdgpu_semaphore_handle sem)
614 {
615 if (!ctx || !sem)
616 return -EINVAL;
617 if (ip_type >= AMDGPU_HW_IP_NUM)
618 return -EINVAL;
619 if (ring >= AMDGPU_CS_MAX_RINGS)
620 return -EINVAL;
621 /* must signal first */
622 if (!sem->signal_fence.context)
623 return -EINVAL;
624
625 pthread_mutex_lock(&ctx->sequence_mutex);
626 list_add(&sem->list, &ctx->sem_list[ip_type][ip_instance][ring]);
627 pthread_mutex_unlock(&ctx->sequence_mutex);
628 return 0;
629 }
630
amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)631 static int amdgpu_cs_reset_sem(amdgpu_semaphore_handle sem)
632 {
633 if (!sem || !sem->signal_fence.context)
634 return -EINVAL;
635
636 sem->signal_fence.context = NULL;
637 sem->signal_fence.ip_type = 0;
638 sem->signal_fence.ip_instance = 0;
639 sem->signal_fence.ring = 0;
640 sem->signal_fence.fence = 0;
641
642 return 0;
643 }
644
amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)645 static int amdgpu_cs_unreference_sem(amdgpu_semaphore_handle sem)
646 {
647 if (!sem)
648 return -EINVAL;
649
650 if (update_references(&sem->refcount, NULL))
651 free(sem);
652 return 0;
653 }
654
amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)655 drm_public int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
656 {
657 return amdgpu_cs_unreference_sem(sem);
658 }
659
amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,uint32_t flags,uint32_t * handle)660 drm_public int amdgpu_cs_create_syncobj2(amdgpu_device_handle dev,
661 uint32_t flags,
662 uint32_t *handle)
663 {
664 if (NULL == dev)
665 return -EINVAL;
666
667 return drmSyncobjCreate(dev->fd, flags, handle);
668 }
669
amdgpu_cs_create_syncobj(amdgpu_device_handle dev,uint32_t * handle)670 drm_public int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
671 uint32_t *handle)
672 {
673 if (NULL == dev)
674 return -EINVAL;
675
676 return drmSyncobjCreate(dev->fd, 0, handle);
677 }
678
amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,uint32_t handle)679 drm_public int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
680 uint32_t handle)
681 {
682 if (NULL == dev)
683 return -EINVAL;
684
685 return drmSyncobjDestroy(dev->fd, handle);
686 }
687
amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,const uint32_t * syncobjs,uint32_t syncobj_count)688 drm_public int amdgpu_cs_syncobj_reset(amdgpu_device_handle dev,
689 const uint32_t *syncobjs,
690 uint32_t syncobj_count)
691 {
692 if (NULL == dev)
693 return -EINVAL;
694
695 return drmSyncobjReset(dev->fd, syncobjs, syncobj_count);
696 }
697
amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,const uint32_t * syncobjs,uint32_t syncobj_count)698 drm_public int amdgpu_cs_syncobj_signal(amdgpu_device_handle dev,
699 const uint32_t *syncobjs,
700 uint32_t syncobj_count)
701 {
702 if (NULL == dev)
703 return -EINVAL;
704
705 return drmSyncobjSignal(dev->fd, syncobjs, syncobj_count);
706 }
707
amdgpu_cs_syncobj_timeline_signal(amdgpu_device_handle dev,const uint32_t * syncobjs,uint64_t * points,uint32_t syncobj_count)708 drm_public int amdgpu_cs_syncobj_timeline_signal(amdgpu_device_handle dev,
709 const uint32_t *syncobjs,
710 uint64_t *points,
711 uint32_t syncobj_count)
712 {
713 if (NULL == dev)
714 return -EINVAL;
715
716 return drmSyncobjTimelineSignal(dev->fd, syncobjs,
717 points, syncobj_count);
718 }
719
amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,uint32_t * handles,unsigned num_handles,int64_t timeout_nsec,unsigned flags,uint32_t * first_signaled)720 drm_public int amdgpu_cs_syncobj_wait(amdgpu_device_handle dev,
721 uint32_t *handles, unsigned num_handles,
722 int64_t timeout_nsec, unsigned flags,
723 uint32_t *first_signaled)
724 {
725 if (NULL == dev)
726 return -EINVAL;
727
728 return drmSyncobjWait(dev->fd, handles, num_handles, timeout_nsec,
729 flags, first_signaled);
730 }
731
amdgpu_cs_syncobj_timeline_wait(amdgpu_device_handle dev,uint32_t * handles,uint64_t * points,unsigned num_handles,int64_t timeout_nsec,unsigned flags,uint32_t * first_signaled)732 drm_public int amdgpu_cs_syncobj_timeline_wait(amdgpu_device_handle dev,
733 uint32_t *handles, uint64_t *points,
734 unsigned num_handles,
735 int64_t timeout_nsec, unsigned flags,
736 uint32_t *first_signaled)
737 {
738 if (NULL == dev)
739 return -EINVAL;
740
741 return drmSyncobjTimelineWait(dev->fd, handles, points, num_handles,
742 timeout_nsec, flags, first_signaled);
743 }
744
amdgpu_cs_syncobj_query(amdgpu_device_handle dev,uint32_t * handles,uint64_t * points,unsigned num_handles)745 drm_public int amdgpu_cs_syncobj_query(amdgpu_device_handle dev,
746 uint32_t *handles, uint64_t *points,
747 unsigned num_handles)
748 {
749 if (NULL == dev)
750 return -EINVAL;
751
752 return drmSyncobjQuery(dev->fd, handles, points, num_handles);
753 }
754
amdgpu_cs_syncobj_query2(amdgpu_device_handle dev,uint32_t * handles,uint64_t * points,unsigned num_handles,uint32_t flags)755 drm_public int amdgpu_cs_syncobj_query2(amdgpu_device_handle dev,
756 uint32_t *handles, uint64_t *points,
757 unsigned num_handles, uint32_t flags)
758 {
759 if (!dev)
760 return -EINVAL;
761
762 return drmSyncobjQuery2(dev->fd, handles, points, num_handles, flags);
763 }
764
amdgpu_cs_export_syncobj(amdgpu_device_handle dev,uint32_t handle,int * shared_fd)765 drm_public int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
766 uint32_t handle,
767 int *shared_fd)
768 {
769 if (NULL == dev)
770 return -EINVAL;
771
772 return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
773 }
774
amdgpu_cs_import_syncobj(amdgpu_device_handle dev,int shared_fd,uint32_t * handle)775 drm_public int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
776 int shared_fd,
777 uint32_t *handle)
778 {
779 if (NULL == dev)
780 return -EINVAL;
781
782 return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
783 }
784
amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,uint32_t syncobj,int * sync_file_fd)785 drm_public int amdgpu_cs_syncobj_export_sync_file(amdgpu_device_handle dev,
786 uint32_t syncobj,
787 int *sync_file_fd)
788 {
789 if (NULL == dev)
790 return -EINVAL;
791
792 return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
793 }
794
amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,uint32_t syncobj,int sync_file_fd)795 drm_public int amdgpu_cs_syncobj_import_sync_file(amdgpu_device_handle dev,
796 uint32_t syncobj,
797 int sync_file_fd)
798 {
799 if (NULL == dev)
800 return -EINVAL;
801
802 return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
803 }
804
amdgpu_cs_syncobj_export_sync_file2(amdgpu_device_handle dev,uint32_t syncobj,uint64_t point,uint32_t flags,int * sync_file_fd)805 drm_public int amdgpu_cs_syncobj_export_sync_file2(amdgpu_device_handle dev,
806 uint32_t syncobj,
807 uint64_t point,
808 uint32_t flags,
809 int *sync_file_fd)
810 {
811 uint32_t binary_handle;
812 int ret;
813
814 if (NULL == dev)
815 return -EINVAL;
816
817 if (!point)
818 return drmSyncobjExportSyncFile(dev->fd, syncobj, sync_file_fd);
819
820 ret = drmSyncobjCreate(dev->fd, 0, &binary_handle);
821 if (ret)
822 return ret;
823
824 ret = drmSyncobjTransfer(dev->fd, binary_handle, 0,
825 syncobj, point, flags);
826 if (ret)
827 goto out;
828 ret = drmSyncobjExportSyncFile(dev->fd, binary_handle, sync_file_fd);
829 out:
830 drmSyncobjDestroy(dev->fd, binary_handle);
831 return ret;
832 }
833
amdgpu_cs_syncobj_import_sync_file2(amdgpu_device_handle dev,uint32_t syncobj,uint64_t point,int sync_file_fd)834 drm_public int amdgpu_cs_syncobj_import_sync_file2(amdgpu_device_handle dev,
835 uint32_t syncobj,
836 uint64_t point,
837 int sync_file_fd)
838 {
839 uint32_t binary_handle;
840 int ret;
841
842 if (NULL == dev)
843 return -EINVAL;
844
845 if (!point)
846 return drmSyncobjImportSyncFile(dev->fd, syncobj, sync_file_fd);
847
848 ret = drmSyncobjCreate(dev->fd, 0, &binary_handle);
849 if (ret)
850 return ret;
851 ret = drmSyncobjImportSyncFile(dev->fd, binary_handle, sync_file_fd);
852 if (ret)
853 goto out;
854 ret = drmSyncobjTransfer(dev->fd, syncobj, point,
855 binary_handle, 0, 0);
856 out:
857 drmSyncobjDestroy(dev->fd, binary_handle);
858 return ret;
859 }
860
amdgpu_cs_syncobj_transfer(amdgpu_device_handle dev,uint32_t dst_handle,uint64_t dst_point,uint32_t src_handle,uint64_t src_point,uint32_t flags)861 drm_public int amdgpu_cs_syncobj_transfer(amdgpu_device_handle dev,
862 uint32_t dst_handle,
863 uint64_t dst_point,
864 uint32_t src_handle,
865 uint64_t src_point,
866 uint32_t flags)
867 {
868 if (NULL == dev)
869 return -EINVAL;
870
871 return drmSyncobjTransfer(dev->fd,
872 dst_handle, dst_point,
873 src_handle, src_point,
874 flags);
875 }
876
amdgpu_cs_submit_raw(amdgpu_device_handle dev,amdgpu_context_handle context,amdgpu_bo_list_handle bo_list_handle,int num_chunks,struct drm_amdgpu_cs_chunk * chunks,uint64_t * seq_no)877 drm_public int amdgpu_cs_submit_raw(amdgpu_device_handle dev,
878 amdgpu_context_handle context,
879 amdgpu_bo_list_handle bo_list_handle,
880 int num_chunks,
881 struct drm_amdgpu_cs_chunk *chunks,
882 uint64_t *seq_no)
883 {
884 union drm_amdgpu_cs cs;
885 uint64_t *chunk_array;
886 int i, r;
887 if (num_chunks == 0)
888 return -EINVAL;
889
890 memset(&cs, 0, sizeof(cs));
891 chunk_array = alloca(sizeof(uint64_t) * num_chunks);
892 for (i = 0; i < num_chunks; i++)
893 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
894 cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
895 cs.in.ctx_id = context->id;
896 cs.in.bo_list_handle = bo_list_handle ? bo_list_handle->handle : 0;
897 cs.in.num_chunks = num_chunks;
898 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
899 &cs, sizeof(cs));
900 if (r)
901 return r;
902
903 if (seq_no)
904 *seq_no = cs.out.handle;
905 return 0;
906 }
907
amdgpu_cs_submit_raw2(amdgpu_device_handle dev,amdgpu_context_handle context,uint32_t bo_list_handle,int num_chunks,struct drm_amdgpu_cs_chunk * chunks,uint64_t * seq_no)908 drm_public int amdgpu_cs_submit_raw2(amdgpu_device_handle dev,
909 amdgpu_context_handle context,
910 uint32_t bo_list_handle,
911 int num_chunks,
912 struct drm_amdgpu_cs_chunk *chunks,
913 uint64_t *seq_no)
914 {
915 union drm_amdgpu_cs cs;
916 uint64_t *chunk_array;
917 int i, r;
918
919 memset(&cs, 0, sizeof(cs));
920 chunk_array = alloca(sizeof(uint64_t) * num_chunks);
921 for (i = 0; i < num_chunks; i++)
922 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
923 cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
924 cs.in.ctx_id = context->id;
925 cs.in.bo_list_handle = bo_list_handle;
926 cs.in.num_chunks = num_chunks;
927 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
928 &cs, sizeof(cs));
929 if (!r && seq_no)
930 *seq_no = cs.out.handle;
931 return r;
932 }
933
amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info * fence_info,struct drm_amdgpu_cs_chunk_data * data)934 drm_public void amdgpu_cs_chunk_fence_info_to_data(struct amdgpu_cs_fence_info *fence_info,
935 struct drm_amdgpu_cs_chunk_data *data)
936 {
937 data->fence_data.handle = fence_info->handle->handle;
938 data->fence_data.offset = fence_info->offset * sizeof(uint64_t);
939 }
940
amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence * fence,struct drm_amdgpu_cs_chunk_dep * dep)941 drm_public void amdgpu_cs_chunk_fence_to_dep(struct amdgpu_cs_fence *fence,
942 struct drm_amdgpu_cs_chunk_dep *dep)
943 {
944 dep->ip_type = fence->ip_type;
945 dep->ip_instance = fence->ip_instance;
946 dep->ring = fence->ring;
947 dep->ctx_id = fence->context->id;
948 dep->handle = fence->fence;
949 }
950
amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,struct amdgpu_cs_fence * fence,uint32_t what,uint32_t * out_handle)951 drm_public int amdgpu_cs_fence_to_handle(amdgpu_device_handle dev,
952 struct amdgpu_cs_fence *fence,
953 uint32_t what,
954 uint32_t *out_handle)
955 {
956 union drm_amdgpu_fence_to_handle fth;
957 int r;
958
959 memset(&fth, 0, sizeof(fth));
960 fth.in.fence.ctx_id = fence->context->id;
961 fth.in.fence.ip_type = fence->ip_type;
962 fth.in.fence.ip_instance = fence->ip_instance;
963 fth.in.fence.ring = fence->ring;
964 fth.in.fence.seq_no = fence->fence;
965 fth.in.what = what;
966
967 r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_FENCE_TO_HANDLE,
968 &fth, sizeof(fth));
969 if (r == 0)
970 *out_handle = fth.out.handle;
971 return r;
972 }
973