1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Fence mechanism for dma-buf and to allow for asynchronous dma access
4 *
5 * Copyright (C) 2012 Canonical Ltd
6 * Copyright (C) 2012 Texas Instruments
7 *
8 * Authors:
9 * Rob Clark <robdclark@gmail.com>
10 * Maarten Lankhorst <maarten.lankhorst@canonical.com>
11 */
12
13 #include <linux/slab.h>
14 #include <linux/export.h>
15 #include <linux/atomic.h>
16 #include <linux/dma-fence.h>
17 #include <linux/sched/signal.h>
18
19 #define CREATE_TRACE_POINTS
20 #include <trace/events/dma_fence.h>
21
22 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
23 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
24 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
25
26 static DEFINE_SPINLOCK(dma_fence_stub_lock);
27 static struct dma_fence dma_fence_stub;
28
29 /*
30 * fence context counter: each execution context should have its own
31 * fence context, this allows checking if fences belong to the same
32 * context or not. One device can have multiple separate contexts,
33 * and they're used if some engine can run independently of another.
34 */
35 static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1);
36
37 /**
38 * DOC: DMA fences overview
39 *
40 * DMA fences, represented by &struct dma_fence, are the kernel internal
41 * synchronization primitive for DMA operations like GPU rendering, video
42 * encoding/decoding, or displaying buffers on a screen.
43 *
44 * A fence is initialized using dma_fence_init() and completed using
45 * dma_fence_signal(). Fences are associated with a context, allocated through
46 * dma_fence_context_alloc(), and all fences on the same context are
47 * fully ordered.
48 *
49 * Since the purposes of fences is to facilitate cross-device and
50 * cross-application synchronization, there's multiple ways to use one:
51 *
52 * - Individual fences can be exposed as a &sync_file, accessed as a file
53 * descriptor from userspace, created by calling sync_file_create(). This is
54 * called explicit fencing, since userspace passes around explicit
55 * synchronization points.
56 *
57 * - Some subsystems also have their own explicit fencing primitives, like
58 * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying
59 * fence to be updated.
60 *
61 * - Then there's also implicit fencing, where the synchronization points are
62 * implicitly passed around as part of shared &dma_buf instances. Such
63 * implicit fences are stored in &struct dma_resv through the
64 * &dma_buf.resv pointer.
65 */
66
67 /**
68 * DOC: fence cross-driver contract
69 *
70 * Since &dma_fence provide a cross driver contract, all drivers must follow the
71 * same rules
72 *
73 * * Fences must complete in a reasonable time. Fences which represent kernels
74 * and shaders submitted by userspace, which could run forever, must be backed
75 * up by timeout and gpu hang recovery code. Minimally that code must prevent
76 * further command submission and force complete all in-flight fences, e.g.
77 * when the driver or hardware do not support gpu reset, or if the gpu reset
78 * failed for some reason. Ideally the driver supports gpu recovery which only
79 * affects the offending userspace context, and no other userspace
80 * submissions.
81 *
82 * * Drivers may have different ideas of what completion within a reasonable
83 * time means. Some hang recovery code uses a fixed timeout, others a mix
84 * between observing forward progress and increasingly strict timeouts.
85 * Drivers should not try to second guess timeout handling of fences from
86 * other drivers.
87 *
88 * * To ensure there's no deadlocks of dma_fence_wait() against other locks
89 * drivers should annotate all code required to reach dma_fence_signal(),
90 * which completes the fences, with dma_fence_begin_signalling() and
91 * dma_fence_end_signalling().
92 *
93 * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock().
94 * This means any code required for fence completion cannot acquire a
95 * &dma_resv lock. Note that this also pulls in the entire established
96 * locking hierarchy around dma_resv_lock() and dma_resv_unlock().
97 *
98 * * Drivers are allowed to call dma_fence_wait() from their &shrinker
99 * callbacks. This means any code required for fence completion cannot
100 * allocate memory with GFP_KERNEL.
101 *
102 * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier
103 * respectively &mmu_interval_notifier callbacks. This means any code required
104 * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO.
105 * Only GFP_ATOMIC is permissible, which might fail.
106 *
107 * Note that only GPU drivers have a reasonable excuse for both requiring
108 * &mmu_interval_notifier and &shrinker callbacks at the same time as having to
109 * track asynchronous compute work using &dma_fence. No driver outside of
110 * drivers/gpu should ever call dma_fence_wait() in such contexts.
111 */
112
dma_fence_stub_get_name(struct dma_fence * fence)113 static const char *dma_fence_stub_get_name(struct dma_fence *fence)
114 {
115 return "stub";
116 }
117
118 static const struct dma_fence_ops dma_fence_stub_ops = {
119 .get_driver_name = dma_fence_stub_get_name,
120 .get_timeline_name = dma_fence_stub_get_name,
121 };
122
123 /**
124 * dma_fence_get_stub - return a signaled fence
125 *
126 * Return a stub fence which is already signaled.
127 */
dma_fence_get_stub(void)128 struct dma_fence *dma_fence_get_stub(void)
129 {
130 spin_lock(&dma_fence_stub_lock);
131 if (!dma_fence_stub.ops) {
132 dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0);
133 dma_fence_signal_locked(&dma_fence_stub);
134 }
135 spin_unlock(&dma_fence_stub_lock);
136
137 return dma_fence_get(&dma_fence_stub);
138 }
139 EXPORT_SYMBOL(dma_fence_get_stub);
140
141 /**
142 * dma_fence_context_alloc - allocate an array of fence contexts
143 * @num: amount of contexts to allocate
144 *
145 * This function will return the first index of the number of fence contexts
146 * allocated. The fence context is used for setting &dma_fence.context to a
147 * unique number by passing the context to dma_fence_init().
148 */
dma_fence_context_alloc(unsigned num)149 u64 dma_fence_context_alloc(unsigned num)
150 {
151 WARN_ON(!num);
152 return atomic64_fetch_add(num, &dma_fence_context_counter);
153 }
154 EXPORT_SYMBOL(dma_fence_context_alloc);
155
156 /**
157 * DOC: fence signalling annotation
158 *
159 * Proving correctness of all the kernel code around &dma_fence through code
160 * review and testing is tricky for a few reasons:
161 *
162 * * It is a cross-driver contract, and therefore all drivers must follow the
163 * same rules for lock nesting order, calling contexts for various functions
164 * and anything else significant for in-kernel interfaces. But it is also
165 * impossible to test all drivers in a single machine, hence brute-force N vs.
166 * N testing of all combinations is impossible. Even just limiting to the
167 * possible combinations is infeasible.
168 *
169 * * There is an enormous amount of driver code involved. For render drivers
170 * there's the tail of command submission, after fences are published,
171 * scheduler code, interrupt and workers to process job completion,
172 * and timeout, gpu reset and gpu hang recovery code. Plus for integration
173 * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier,
174 * and &shrinker. For modesetting drivers there's the commit tail functions
175 * between when fences for an atomic modeset are published, and when the
176 * corresponding vblank completes, including any interrupt processing and
177 * related workers. Auditing all that code, across all drivers, is not
178 * feasible.
179 *
180 * * Due to how many other subsystems are involved and the locking hierarchies
181 * this pulls in there is extremely thin wiggle-room for driver-specific
182 * differences. &dma_fence interacts with almost all of the core memory
183 * handling through page fault handlers via &dma_resv, dma_resv_lock() and
184 * dma_resv_unlock(). On the other side it also interacts through all
185 * allocation sites through &mmu_notifier and &shrinker.
186 *
187 * Furthermore lockdep does not handle cross-release dependencies, which means
188 * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught
189 * at runtime with some quick testing. The simplest example is one thread
190 * waiting on a &dma_fence while holding a lock::
191 *
192 * lock(A);
193 * dma_fence_wait(B);
194 * unlock(A);
195 *
196 * while the other thread is stuck trying to acquire the same lock, which
197 * prevents it from signalling the fence the previous thread is stuck waiting
198 * on::
199 *
200 * lock(A);
201 * unlock(A);
202 * dma_fence_signal(B);
203 *
204 * By manually annotating all code relevant to signalling a &dma_fence we can
205 * teach lockdep about these dependencies, which also helps with the validation
206 * headache since now lockdep can check all the rules for us::
207 *
208 * cookie = dma_fence_begin_signalling();
209 * lock(A);
210 * unlock(A);
211 * dma_fence_signal(B);
212 * dma_fence_end_signalling(cookie);
213 *
214 * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to
215 * annotate critical sections the following rules need to be observed:
216 *
217 * * All code necessary to complete a &dma_fence must be annotated, from the
218 * point where a fence is accessible to other threads, to the point where
219 * dma_fence_signal() is called. Un-annotated code can contain deadlock issues,
220 * and due to the very strict rules and many corner cases it is infeasible to
221 * catch these just with review or normal stress testing.
222 *
223 * * &struct dma_resv deserves a special note, since the readers are only
224 * protected by rcu. This means the signalling critical section starts as soon
225 * as the new fences are installed, even before dma_resv_unlock() is called.
226 *
227 * * The only exception are fast paths and opportunistic signalling code, which
228 * calls dma_fence_signal() purely as an optimization, but is not required to
229 * guarantee completion of a &dma_fence. The usual example is a wait IOCTL
230 * which calls dma_fence_signal(), while the mandatory completion path goes
231 * through a hardware interrupt and possible job completion worker.
232 *
233 * * To aid composability of code, the annotations can be freely nested, as long
234 * as the overall locking hierarchy is consistent. The annotations also work
235 * both in interrupt and process context. Due to implementation details this
236 * requires that callers pass an opaque cookie from
237 * dma_fence_begin_signalling() to dma_fence_end_signalling().
238 *
239 * * Validation against the cross driver contract is implemented by priming
240 * lockdep with the relevant hierarchy at boot-up. This means even just
241 * testing with a single device is enough to validate a driver, at least as
242 * far as deadlocks with dma_fence_wait() against dma_fence_signal() are
243 * concerned.
244 */
245 #ifdef CONFIG_LOCKDEP
246 static struct lockdep_map dma_fence_lockdep_map = {.name = "dma_fence_map"};
247
248 /**
249 * dma_fence_begin_signalling - begin a critical DMA fence signalling section
250 *
251 * Drivers should use this to annotate the beginning of any code section
252 * required to eventually complete &dma_fence by calling dma_fence_signal().
253 *
254 * The end of these critical sections are annotated with
255 * dma_fence_end_signalling().
256 *
257 * Returns
258 *
259 * Opaque cookie needed by the implementation, which needs to be passed to
260 * dma_fence_end_signalling().
261 */
dma_fence_begin_signalling(void)262 bool dma_fence_begin_signalling(void)
263 {
264 /* explicitly nesting ... */
265 if (lock_is_held_type(&dma_fence_lockdep_map, 1)) {
266 return true;
267 }
268
269 /* rely on might_sleep check for soft/hardirq locks */
270 if (in_atomic()) {
271 return true;
272 }
273
274 /* ... and non-recursive readlock */
275 lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_);
276
277 return false;
278 }
279 EXPORT_SYMBOL(dma_fence_begin_signalling);
280
281 /**
282 * dma_fence_end_signalling - end a critical DMA fence signalling section
283 * @cookie: opaque cookie from dma_fence_begin_signalling()
284 *
285 * Closes a critical section annotation opened by dma_fence_begin_signalling().
286 */
dma_fence_end_signalling(bool cookie)287 void dma_fence_end_signalling(bool cookie)
288 {
289 if (cookie) {
290 return;
291 }
292
293 lock_release(&dma_fence_lockdep_map, _RET_IP_);
294 }
295 EXPORT_SYMBOL(dma_fence_end_signalling);
296
_dma_fence_might_wait(void)297 void _dma_fence_might_wait(void)
298 {
299 bool tmp;
300
301 tmp = lock_is_held_type(&dma_fence_lockdep_map, 1);
302 if (tmp) {
303 lock_release(&dma_fence_lockdep_map, _THIS_IP_);
304 }
305 lock_map_acquire(&dma_fence_lockdep_map);
306 lock_map_release(&dma_fence_lockdep_map);
307 if (tmp) {
308 lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_);
309 }
310 }
311 #endif
312
313 /**
314 * dma_fence_signal_timestamp_locked - signal completion of a fence
315 * @fence: the fence to signal
316 * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
317 *
318 * Signal completion for software callbacks on a fence, this will unblock
319 * dma_fence_wait() calls and run all the callbacks added with
320 * dma_fence_add_callback(). Can be called multiple times, but since a fence
321 * can only go from the unsignaled to the signaled state and not back, it will
322 * only be effective the first time. Set the timestamp provided as the fence
323 * signal timestamp.
324 *
325 * Unlike dma_fence_signal_timestamp(), this function must be called with
326 * &dma_fence.lock held.
327 *
328 * Returns 0 on success and a negative error value when @fence has been
329 * signalled already.
330 */
dma_fence_signal_timestamp_locked(struct dma_fence * fence,ktime_t timestamp)331 int dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp)
332 {
333 struct dma_fence_cb *cur, *tmp;
334 struct list_head cb_list;
335
336 lockdep_assert_held(fence->lock);
337
338 if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
339 &fence->flags)))
340 return -EINVAL;
341
342 /* Stash the cb_list before replacing it with the timestamp */
343 list_replace(&fence->cb_list, &cb_list);
344
345 fence->timestamp = timestamp;
346 set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
347 trace_dma_fence_signaled(fence);
348
349 list_for_each_entry_safe(cur, tmp, &cb_list, node) {
350 INIT_LIST_HEAD(&cur->node);
351 cur->func(fence, cur);
352 }
353
354 return 0;
355 }
356 EXPORT_SYMBOL(dma_fence_signal_timestamp_locked);
357
358 /**
359 * dma_fence_signal_timestamp - signal completion of a fence
360 * @fence: the fence to signal
361 * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
362 *
363 * Signal completion for software callbacks on a fence, this will unblock
364 * dma_fence_wait() calls and run all the callbacks added with
365 * dma_fence_add_callback(). Can be called multiple times, but since a fence
366 * can only go from the unsignaled to the signaled state and not back, it will
367 * only be effective the first time. Set the timestamp provided as the fence
368 * signal timestamp.
369 *
370 * Returns 0 on success and a negative error value when @fence has been
371 * signalled already.
372 */
dma_fence_signal_timestamp(struct dma_fence * fence,ktime_t timestamp)373 int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp)
374 {
375 unsigned long flags;
376 int ret;
377
378 if (!fence) {
379 return -EINVAL;
380 }
381
382 spin_lock_irqsave(fence->lock, flags);
383 ret = dma_fence_signal_timestamp_locked(fence, timestamp);
384 spin_unlock_irqrestore(fence->lock, flags);
385
386 return ret;
387 }
388 EXPORT_SYMBOL(dma_fence_signal_timestamp);
389 /**
390 * dma_fence_signal_locked - signal completion of a fence
391 * @fence: the fence to signal
392 *
393 * Signal completion for software callbacks on a fence, this will unblock
394 * dma_fence_wait() calls and run all the callbacks added with
395 * dma_fence_add_callback(). Can be called multiple times, but since a fence
396 * can only go from the unsignaled to the signaled state and not back, it will
397 * only be effective the first time.
398 *
399 * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock
400 * held.
401 *
402 * Returns 0 on success and a negative error value when @fence has been
403 * signalled already.
404 */
dma_fence_signal_locked(struct dma_fence * fence)405 int dma_fence_signal_locked(struct dma_fence *fence)
406 {
407 return dma_fence_signal_timestamp_locked(fence, ktime_get());
408 }
409 EXPORT_SYMBOL(dma_fence_signal_locked);
410
411 /**
412 * dma_fence_signal - signal completion of a fence
413 * @fence: the fence to signal
414 *
415 * Signal completion for software callbacks on a fence, this will unblock
416 * dma_fence_wait() calls and run all the callbacks added with
417 * dma_fence_add_callback(). Can be called multiple times, but since a fence
418 * can only go from the unsignaled to the signaled state and not back, it will
419 * only be effective the first time.
420 *
421 * Returns 0 on success and a negative error value when @fence has been
422 * signalled already.
423 */
dma_fence_signal(struct dma_fence * fence)424 int dma_fence_signal(struct dma_fence *fence)
425 {
426 unsigned long flags;
427 int ret;
428 bool tmp;
429
430 if (!fence) {
431 return -EINVAL;
432 }
433
434 tmp = dma_fence_begin_signalling();
435
436 spin_lock_irqsave(fence->lock, flags);
437 ret = dma_fence_signal_timestamp_locked(fence, ktime_get());
438 spin_unlock_irqrestore(fence->lock, flags);
439
440 dma_fence_end_signalling(tmp);
441
442 return ret;
443 }
444 EXPORT_SYMBOL(dma_fence_signal);
445
446 /**
447 * dma_fence_wait_timeout - sleep until the fence gets signaled
448 * or until timeout elapses
449 * @fence: the fence to wait on
450 * @intr: if true, do an interruptible wait
451 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
452 *
453 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
454 * remaining timeout in jiffies on success. Other error values may be
455 * returned on custom implementations.
456 *
457 * Performs a synchronous wait on this fence. It is assumed the caller
458 * directly or indirectly (buf-mgr between reservation and committing)
459 * holds a reference to the fence, otherwise the fence might be
460 * freed before return, resulting in undefined behavior.
461 *
462 * See also dma_fence_wait() and dma_fence_wait_any_timeout().
463 */
dma_fence_wait_timeout(struct dma_fence * fence,bool intr,signed long timeout)464 signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
465 {
466 signed long ret;
467
468 if (WARN_ON(timeout < 0)) {
469 return -EINVAL;
470 }
471
472 might_sleep();
473
474 _dma_fence_might_wait();
475
476 trace_dma_fence_wait_start(fence);
477 if (fence->ops->wait) {
478 ret = fence->ops->wait(fence, intr, timeout);
479 } else {
480 ret = dma_fence_default_wait(fence, intr, timeout);
481 }
482 trace_dma_fence_wait_end(fence);
483 return ret;
484 }
485 EXPORT_SYMBOL(dma_fence_wait_timeout);
486
487 /**
488 * dma_fence_release - default relese function for fences
489 * @kref: &dma_fence.recfount
490 *
491 * This is the default release functions for &dma_fence. Drivers shouldn't call
492 * this directly, but instead call dma_fence_put().
493 */
dma_fence_release(struct kref * kref)494 void dma_fence_release(struct kref *kref)
495 {
496 struct dma_fence *fence = container_of(kref, struct dma_fence, refcount);
497
498 trace_dma_fence_destroy(fence);
499
500 if (WARN(!list_empty(&fence->cb_list) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
501 "Fence %s:%s:%llx:%llx released with pending signals!\n", fence->ops->get_driver_name(fence),
502 fence->ops->get_timeline_name(fence), fence->context, fence->seqno)) {
503 unsigned long flags;
504
505 /*
506 * Failed to signal before release, likely a refcounting issue.
507 *
508 * This should never happen, but if it does make sure that we
509 * don't leave chains dangling. We set the error flag first
510 * so that the callbacks know this signal is due to an error.
511 */
512 spin_lock_irqsave(fence->lock, flags);
513 fence->error = -EDEADLK;
514 dma_fence_signal_locked(fence);
515 spin_unlock_irqrestore(fence->lock, flags);
516 }
517
518 if (fence->ops->release) {
519 fence->ops->release(fence);
520 } else {
521 dma_fence_free(fence);
522 }
523 }
524 EXPORT_SYMBOL(dma_fence_release);
525
526 /**
527 * dma_fence_free - default release function for &dma_fence.
528 * @fence: fence to release
529 *
530 * This is the default implementation for &dma_fence_ops.release. It calls
531 * kfree_rcu() on @fence.
532 */
dma_fence_free(struct dma_fence * fence)533 void dma_fence_free(struct dma_fence *fence)
534 {
535 kfree_rcu(fence, rcu);
536 }
537 EXPORT_SYMBOL(dma_fence_free);
538
_dma_fence_enable_signaling(struct dma_fence * fence)539 static bool _dma_fence_enable_signaling(struct dma_fence *fence)
540 {
541 bool was_set;
542
543 lockdep_assert_held(fence->lock);
544
545 was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
546
547 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
548 return false;
549 }
550
551 if (!was_set && fence->ops->enable_signaling) {
552 trace_dma_fence_enable_signal(fence);
553
554 if (!fence->ops->enable_signaling(fence)) {
555 dma_fence_signal_locked(fence);
556 return false;
557 }
558 }
559
560 return true;
561 }
562
563 /**
564 * dma_fence_enable_sw_signaling - enable signaling on fence
565 * @fence: the fence to enable
566 *
567 * This will request for sw signaling to be enabled, to make the fence
568 * complete as soon as possible. This calls &dma_fence_ops.enable_signaling
569 * internally.
570 */
dma_fence_enable_sw_signaling(struct dma_fence * fence)571 void dma_fence_enable_sw_signaling(struct dma_fence *fence)
572 {
573 unsigned long flags;
574
575 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
576 return;
577 }
578
579 spin_lock_irqsave(fence->lock, flags);
580 _dma_fence_enable_signaling(fence);
581 spin_unlock_irqrestore(fence->lock, flags);
582 }
583 EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
584
585 /**
586 * dma_fence_add_callback - add a callback to be called when the fence
587 * is signaled
588 * @fence: the fence to wait on
589 * @cb: the callback to register
590 * @func: the function to call
591 *
592 * @cb will be initialized by dma_fence_add_callback(), no initialization
593 * by the caller is required. Any number of callbacks can be registered
594 * to a fence, but a callback can only be registered to one fence at a time.
595 *
596 * Note that the callback can be called from an atomic context. If
597 * fence is already signaled, this function will return -ENOENT (and
598 * *not* call the callback).
599 *
600 * Add a software callback to the fence. Same restrictions apply to
601 * refcount as it does to dma_fence_wait(), however the caller doesn't need to
602 * keep a refcount to fence afterward dma_fence_add_callback() has returned:
603 * when software access is enabled, the creator of the fence is required to keep
604 * the fence alive until after it signals with dma_fence_signal(). The callback
605 * itself can be called from irq context.
606 *
607 * Returns 0 in case of success, -ENOENT if the fence is already signaled
608 * and -EINVAL in case of error.
609 */
dma_fence_add_callback(struct dma_fence * fence,struct dma_fence_cb * cb,dma_fence_func_t func)610 int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func)
611 {
612 unsigned long flags;
613 int ret = 0;
614
615 if (WARN_ON(!fence || !func)) {
616 return -EINVAL;
617 }
618
619 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
620 INIT_LIST_HEAD(&cb->node);
621 return -ENOENT;
622 }
623
624 spin_lock_irqsave(fence->lock, flags);
625
626 if (_dma_fence_enable_signaling(fence)) {
627 cb->func = func;
628 list_add_tail(&cb->node, &fence->cb_list);
629 } else {
630 INIT_LIST_HEAD(&cb->node);
631 ret = -ENOENT;
632 }
633
634 spin_unlock_irqrestore(fence->lock, flags);
635
636 return ret;
637 }
638 EXPORT_SYMBOL(dma_fence_add_callback);
639
640 /**
641 * dma_fence_get_status - returns the status upon completion
642 * @fence: the dma_fence to query
643 *
644 * This wraps dma_fence_get_status_locked() to return the error status
645 * condition on a signaled fence. See dma_fence_get_status_locked() for more
646 * details.
647 *
648 * Returns 0 if the fence has not yet been signaled, 1 if the fence has
649 * been signaled without an error condition, or a negative error code
650 * if the fence has been completed in err.
651 */
dma_fence_get_status(struct dma_fence * fence)652 int dma_fence_get_status(struct dma_fence *fence)
653 {
654 unsigned long flags;
655 int status;
656
657 spin_lock_irqsave(fence->lock, flags);
658 status = dma_fence_get_status_locked(fence);
659 spin_unlock_irqrestore(fence->lock, flags);
660
661 return status;
662 }
663 EXPORT_SYMBOL(dma_fence_get_status);
664
665 /**
666 * dma_fence_remove_callback - remove a callback from the signaling list
667 * @fence: the fence to wait on
668 * @cb: the callback to remove
669 *
670 * Remove a previously queued callback from the fence. This function returns
671 * true if the callback is successfully removed, or false if the fence has
672 * already been signaled.
673 *
674 * *WARNING*:
675 * Cancelling a callback should only be done if you really know what you're
676 * doing, since deadlocks and race conditions could occur all too easily. For
677 * this reason, it should only ever be done on hardware lockup recovery,
678 * with a reference held to the fence.
679 *
680 * Behaviour is undefined if @cb has not been added to @fence using
681 * dma_fence_add_callback() beforehand.
682 */
dma_fence_remove_callback(struct dma_fence * fence,struct dma_fence_cb * cb)683 bool dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
684 {
685 unsigned long flags;
686 bool ret;
687
688 spin_lock_irqsave(fence->lock, flags);
689
690 ret = !list_empty(&cb->node);
691 if (ret) {
692 list_del_init(&cb->node);
693 }
694
695 spin_unlock_irqrestore(fence->lock, flags);
696
697 return ret;
698 }
699 EXPORT_SYMBOL(dma_fence_remove_callback);
700
701 struct default_wait_cb {
702 struct dma_fence_cb base;
703 struct task_struct *task;
704 };
705
dma_fence_default_wait_cb(struct dma_fence * fence,struct dma_fence_cb * cb)706 static void dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
707 {
708 struct default_wait_cb *wait = container_of(cb, struct default_wait_cb, base);
709
710 wake_up_state(wait->task, TASK_NORMAL);
711 }
712
713 /**
714 * dma_fence_default_wait - default sleep until the fence gets signaled
715 * or until timeout elapses
716 * @fence: the fence to wait on
717 * @intr: if true, do an interruptible wait
718 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
719 *
720 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
721 * remaining timeout in jiffies on success. If timeout is zero the value one is
722 * returned if the fence is already signaled for consistency with other
723 * functions taking a jiffies timeout.
724 */
dma_fence_default_wait(struct dma_fence * fence,bool intr,signed long timeout)725 signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
726 {
727 struct default_wait_cb cb;
728 unsigned long flags;
729 signed long ret = timeout ? timeout : 1;
730
731 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
732 return ret;
733 }
734
735 spin_lock_irqsave(fence->lock, flags);
736
737 if (intr && signal_pending(current)) {
738 ret = -ERESTARTSYS;
739 goto out;
740 }
741
742 if (!_dma_fence_enable_signaling(fence)) {
743 goto out;
744 }
745
746 if (!timeout) {
747 ret = 0;
748 goto out;
749 }
750
751 cb.base.func = dma_fence_default_wait_cb;
752 cb.task = current;
753 list_add(&cb.base.node, &fence->cb_list);
754
755 while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
756 if (intr) {
757 __set_current_state(TASK_INTERRUPTIBLE);
758 } else {
759 __set_current_state(TASK_UNINTERRUPTIBLE);
760 }
761 spin_unlock_irqrestore(fence->lock, flags);
762
763 ret = schedule_timeout(ret);
764
765 spin_lock_irqsave(fence->lock, flags);
766 if (ret > 0 && intr && signal_pending(current)) {
767 ret = -ERESTARTSYS;
768 }
769 }
770
771 if (!list_empty(&cb.base.node)) {
772 list_del(&cb.base.node);
773 }
774 __set_current_state(TASK_RUNNING);
775
776 out:
777 spin_unlock_irqrestore(fence->lock, flags);
778 return ret;
779 }
780 EXPORT_SYMBOL(dma_fence_default_wait);
781
dma_fence_test_signaled_any(struct dma_fence ** fences,uint32_t count,uint32_t * idx)782 static bool dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, uint32_t *idx)
783 {
784 int i;
785
786 for (i = 0; i < count; ++i) {
787 struct dma_fence *fence = fences[i];
788 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
789 if (idx) {
790 *idx = i;
791 }
792 return true;
793 }
794 }
795 return false;
796 }
797
798 /**
799 * dma_fence_wait_any_timeout - sleep until any fence gets signaled
800 * or until timeout elapses
801 * @fences: array of fences to wait on
802 * @count: number of fences to wait on
803 * @intr: if true, do an interruptible wait
804 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
805 * @idx: used to store the first signaled fence index, meaningful only on
806 * positive return
807 *
808 * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
809 * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
810 * on success.
811 *
812 * Synchronous waits for the first fence in the array to be signaled. The
813 * caller needs to hold a reference to all fences in the array, otherwise a
814 * fence might be freed before return, resulting in undefined behavior.
815 *
816 * See also dma_fence_wait() and dma_fence_wait_timeout().
817 */
dma_fence_wait_any_timeout(struct dma_fence ** fences,uint32_t count,bool intr,signed long timeout,uint32_t * idx)818 signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, bool intr, signed long timeout,
819 uint32_t *idx)
820 {
821 struct default_wait_cb *cb;
822 signed long ret = timeout;
823 unsigned i;
824
825 if (WARN_ON(!fences || !count || timeout < 0)) {
826 return -EINVAL;
827 }
828
829 if (timeout == 0) {
830 for (i = 0; i < count; ++i) {
831 if (dma_fence_is_signaled(fences[i]) && idx) {
832 *idx = i;
833 return 1;
834 }
835 }
836 return 0;
837 }
838
839 cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
840 if (cb == NULL) {
841 ret = -ENOMEM;
842 goto err_free_cb;
843 }
844
845 for (i = 0; i < count; ++i) {
846 struct dma_fence *fence = fences[i];
847 cb[i].task = current;
848 if (dma_fence_add_callback(fence, &cb[i].base, dma_fence_default_wait_cb)) {
849 if (idx) {
850 *idx = i;
851 }
852 goto fence_rm_cb;
853 }
854 }
855
856 while (ret > 0) {
857 if (intr) {
858 set_current_state(TASK_INTERRUPTIBLE);
859 } else {
860 set_current_state(TASK_UNINTERRUPTIBLE);
861 }
862
863 if (dma_fence_test_signaled_any(fences, count, idx)) {
864 break;
865 }
866
867 ret = schedule_timeout(ret);
868 if (ret > 0 && intr && signal_pending(current)) {
869 ret = -ERESTARTSYS;
870 }
871 }
872 __set_current_state(TASK_RUNNING);
873 fence_rm_cb:
874 while (i-- > 0) {
875 dma_fence_remove_callback(fences[i], &cb[i].base);
876 }
877 err_free_cb:
878 kfree(cb);
879
880 return ret;
881 }
882 EXPORT_SYMBOL(dma_fence_wait_any_timeout);
883
884 /**
885 * dma_fence_init - Initialize a custom fence.
886 * @fence: the fence to initialize
887 * @ops: the dma_fence_ops for operations on this fence
888 * @lock: the irqsafe spinlock to use for locking this fence
889 * @context: the execution context this fence is run on
890 * @seqno: a linear increasing sequence number for this context
891 *
892 * Initializes an allocated fence, the caller doesn't have to keep its
893 * refcount after committing with this fence, but it will need to hold a
894 * refcount again if &dma_fence_ops.enable_signaling gets called.
895 *
896 * context and seqno are used for easy comparison between fences, allowing
897 * to check which fence is later by simply using dma_fence_later().
898 */
dma_fence_init(struct dma_fence * fence,const struct dma_fence_ops * ops,spinlock_t * lock,u64 context,u64 seqno)899 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno)
900 {
901 BUG_ON(!lock);
902 BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
903
904 kref_init(&fence->refcount);
905 fence->ops = ops;
906 INIT_LIST_HEAD(&fence->cb_list);
907 fence->lock = lock;
908 fence->context = context;
909 fence->seqno = seqno;
910 fence->flags = 0UL;
911 fence->error = 0;
912
913 trace_dma_fence_init(fence);
914 }
915 EXPORT_SYMBOL(dma_fence_init);
916