1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_execbuf_util.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <uapi/drm/xe_drm.h>
17 #include <linux/ascii85.h>
18 #include <linux/delay.h>
19 #include <linux/kthread.h>
20 #include <linux/mm.h>
21 #include <linux/swap.h>
22
23 #include <generated/xe_wa_oob.h>
24
25 #include "regs/xe_gtt_defs.h"
26 #include "xe_assert.h"
27 #include "xe_bo.h"
28 #include "xe_device.h"
29 #include "xe_drm_client.h"
30 #include "xe_exec_queue.h"
31 #include "xe_gt_pagefault.h"
32 #include "xe_gt_tlb_invalidation.h"
33 #include "xe_migrate.h"
34 #include "xe_pat.h"
35 #include "xe_pm.h"
36 #include "xe_preempt_fence.h"
37 #include "xe_pt.h"
38 #include "xe_res_cursor.h"
39 #include "xe_sync.h"
40 #include "xe_trace_bo.h"
41 #include "xe_wa.h"
42 #include "xe_hmm.h"
43
xe_vm_obj(struct xe_vm * vm)44 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
45 {
46 return vm->gpuvm.r_obj;
47 }
48
49 /**
50 * xe_vma_userptr_check_repin() - Advisory check for repin needed
51 * @uvma: The userptr vma
52 *
53 * Check if the userptr vma has been invalidated since last successful
54 * repin. The check is advisory only and can the function can be called
55 * without the vm->userptr.notifier_lock held. There is no guarantee that the
56 * vma userptr will remain valid after a lockless check, so typically
57 * the call needs to be followed by a proper check under the notifier_lock.
58 *
59 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
60 */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)61 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
62 {
63 return mmu_interval_check_retry(&uvma->userptr.notifier,
64 uvma->userptr.notifier_seq) ?
65 -EAGAIN : 0;
66 }
67
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)68 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
69 {
70 struct xe_vma *vma = &uvma->vma;
71 struct xe_vm *vm = xe_vma_vm(vma);
72 struct xe_device *xe = vm->xe;
73
74 lockdep_assert_held(&vm->lock);
75 xe_assert(xe, xe_vma_is_userptr(vma));
76
77 return xe_hmm_userptr_populate_range(uvma, false);
78 }
79
preempt_fences_waiting(struct xe_vm * vm)80 static bool preempt_fences_waiting(struct xe_vm *vm)
81 {
82 struct xe_exec_queue *q;
83
84 lockdep_assert_held(&vm->lock);
85 xe_vm_assert_held(vm);
86
87 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
88 if (!q->lr.pfence ||
89 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
90 &q->lr.pfence->flags)) {
91 return true;
92 }
93 }
94
95 return false;
96 }
97
free_preempt_fences(struct list_head * list)98 static void free_preempt_fences(struct list_head *list)
99 {
100 struct list_head *link, *next;
101
102 list_for_each_safe(link, next, list)
103 xe_preempt_fence_free(to_preempt_fence_from_link(link));
104 }
105
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)106 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
107 unsigned int *count)
108 {
109 lockdep_assert_held(&vm->lock);
110 xe_vm_assert_held(vm);
111
112 if (*count >= vm->preempt.num_exec_queues)
113 return 0;
114
115 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
116 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
117
118 if (IS_ERR(pfence))
119 return PTR_ERR(pfence);
120
121 list_move_tail(xe_preempt_fence_link(pfence), list);
122 }
123
124 return 0;
125 }
126
wait_for_existing_preempt_fences(struct xe_vm * vm)127 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
128 {
129 struct xe_exec_queue *q;
130
131 xe_vm_assert_held(vm);
132
133 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
134 if (q->lr.pfence) {
135 long timeout = dma_fence_wait(q->lr.pfence, false);
136
137 /* Only -ETIME on fence indicates VM needs to be killed */
138 if (timeout < 0 || q->lr.pfence->error == -ETIME)
139 return -ETIME;
140
141 dma_fence_put(q->lr.pfence);
142 q->lr.pfence = NULL;
143 }
144 }
145
146 return 0;
147 }
148
xe_vm_is_idle(struct xe_vm * vm)149 static bool xe_vm_is_idle(struct xe_vm *vm)
150 {
151 struct xe_exec_queue *q;
152
153 xe_vm_assert_held(vm);
154 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
155 if (!xe_exec_queue_is_idle(q))
156 return false;
157 }
158
159 return true;
160 }
161
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)162 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
163 {
164 struct list_head *link;
165 struct xe_exec_queue *q;
166
167 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
168 struct dma_fence *fence;
169
170 link = list->next;
171 xe_assert(vm->xe, link != list);
172
173 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
174 q, q->lr.context,
175 ++q->lr.seqno);
176 dma_fence_put(q->lr.pfence);
177 q->lr.pfence = fence;
178 }
179 }
180
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)181 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
182 {
183 struct xe_exec_queue *q;
184 int err;
185
186 xe_bo_assert_held(bo);
187
188 if (!vm->preempt.num_exec_queues)
189 return 0;
190
191 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
192 if (err)
193 return err;
194
195 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
196 if (q->lr.pfence) {
197 dma_resv_add_fence(bo->ttm.base.resv,
198 q->lr.pfence,
199 DMA_RESV_USAGE_BOOKKEEP);
200 }
201
202 return 0;
203 }
204
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)205 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
206 struct drm_exec *exec)
207 {
208 struct xe_exec_queue *q;
209
210 lockdep_assert_held(&vm->lock);
211 xe_vm_assert_held(vm);
212
213 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
214 q->ops->resume(q);
215
216 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
217 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
218 }
219 }
220
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)221 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
222 {
223 struct drm_gpuvm_exec vm_exec = {
224 .vm = &vm->gpuvm,
225 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
226 .num_fences = 1,
227 };
228 struct drm_exec *exec = &vm_exec.exec;
229 struct dma_fence *pfence;
230 int err;
231 bool wait;
232
233 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
234
235 down_write(&vm->lock);
236 err = drm_gpuvm_exec_lock(&vm_exec);
237 if (err)
238 goto out_up_write;
239
240 pfence = xe_preempt_fence_create(q, q->lr.context,
241 ++q->lr.seqno);
242 if (IS_ERR(pfence)) {
243 err = PTR_ERR(pfence);
244 goto out_fini;
245 }
246
247 list_add(&q->lr.link, &vm->preempt.exec_queues);
248 ++vm->preempt.num_exec_queues;
249 q->lr.pfence = pfence;
250
251 down_read(&vm->userptr.notifier_lock);
252
253 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
254 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
255
256 /*
257 * Check to see if a preemption on VM is in flight or userptr
258 * invalidation, if so trigger this preempt fence to sync state with
259 * other preempt fences on the VM.
260 */
261 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
262 if (wait)
263 dma_fence_enable_sw_signaling(pfence);
264
265 up_read(&vm->userptr.notifier_lock);
266
267 out_fini:
268 drm_exec_fini(exec);
269 out_up_write:
270 up_write(&vm->lock);
271
272 return err;
273 }
274
275 /**
276 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
277 * @vm: The VM.
278 * @q: The exec_queue
279 *
280 * Note that this function might be called multiple times on the same queue.
281 */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)282 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
283 {
284 if (!xe_vm_in_preempt_fence_mode(vm))
285 return;
286
287 down_write(&vm->lock);
288 if (!list_empty(&q->lr.link)) {
289 list_del_init(&q->lr.link);
290 --vm->preempt.num_exec_queues;
291 }
292 if (q->lr.pfence) {
293 dma_fence_enable_sw_signaling(q->lr.pfence);
294 dma_fence_put(q->lr.pfence);
295 q->lr.pfence = NULL;
296 }
297 up_write(&vm->lock);
298 }
299
300 /**
301 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
302 * that need repinning.
303 * @vm: The VM.
304 *
305 * This function checks for whether the VM has userptrs that need repinning,
306 * and provides a release-type barrier on the userptr.notifier_lock after
307 * checking.
308 *
309 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
310 */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)311 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
312 {
313 lockdep_assert_held_read(&vm->userptr.notifier_lock);
314
315 return (list_empty(&vm->userptr.repin_list) &&
316 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
317 }
318
319 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
320
321 /**
322 * xe_vm_kill() - VM Kill
323 * @vm: The VM.
324 * @unlocked: Flag indicates the VM's dma-resv is not held
325 *
326 * Kill the VM by setting banned flag indicated VM is no longer available for
327 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
328 */
xe_vm_kill(struct xe_vm * vm,bool unlocked)329 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
330 {
331 struct xe_exec_queue *q;
332
333 lockdep_assert_held(&vm->lock);
334
335 if (unlocked)
336 xe_vm_lock(vm, false);
337
338 vm->flags |= XE_VM_FLAG_BANNED;
339 trace_xe_vm_kill(vm);
340
341 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
342 q->ops->kill(q);
343
344 if (unlocked)
345 xe_vm_unlock(vm);
346
347 /* TODO: Inform user the VM is banned */
348 }
349
350 /**
351 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
352 * @exec: The drm_exec object used for locking before validation.
353 * @err: The error returned from ttm_bo_validate().
354 * @end: A ktime_t cookie that should be set to 0 before first use and
355 * that should be reused on subsequent calls.
356 *
357 * With multiple active VMs, under memory pressure, it is possible that
358 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
359 * Until ttm properly handles locking in such scenarios, best thing the
360 * driver can do is retry with a timeout. Check if that is necessary, and
361 * if so unlock the drm_exec's objects while keeping the ticket to prepare
362 * for a rerun.
363 *
364 * Return: true if a retry after drm_exec_init() is recommended;
365 * false otherwise.
366 */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)367 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
368 {
369 ktime_t cur;
370
371 if (err != -ENOMEM)
372 return false;
373
374 cur = ktime_get();
375 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
376 if (!ktime_before(cur, *end))
377 return false;
378
379 msleep(20);
380 return true;
381 }
382
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)383 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
384 {
385 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
386 struct drm_gpuva *gpuva;
387 int ret;
388
389 lockdep_assert_held(&vm->lock);
390 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
391 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
392 &vm->rebind_list);
393
394 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
395 if (ret)
396 return ret;
397
398 vm_bo->evicted = false;
399 return 0;
400 }
401
402 /**
403 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
404 * @vm: The vm for which we are rebinding.
405 * @exec: The struct drm_exec with the locked GEM objects.
406 * @num_fences: The number of fences to reserve for the operation, not
407 * including rebinds and validations.
408 *
409 * Validates all evicted gem objects and rebinds their vmas. Note that
410 * rebindings may cause evictions and hence the validation-rebind
411 * sequence is rerun until there are no more objects to validate.
412 *
413 * Return: 0 on success, negative error code on error. In particular,
414 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
415 * the drm_exec transaction needs to be restarted.
416 */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)417 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
418 unsigned int num_fences)
419 {
420 struct drm_gem_object *obj;
421 unsigned long index;
422 int ret;
423
424 do {
425 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
426 if (ret)
427 return ret;
428
429 ret = xe_vm_rebind(vm, false);
430 if (ret)
431 return ret;
432 } while (!list_empty(&vm->gpuvm.evict.list));
433
434 drm_exec_for_each_locked_object(exec, index, obj) {
435 ret = dma_resv_reserve_fences(obj->resv, num_fences);
436 if (ret)
437 return ret;
438 }
439
440 return 0;
441 }
442
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)443 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
444 bool *done)
445 {
446 int err;
447
448 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
449 if (err)
450 return err;
451
452 if (xe_vm_is_idle(vm)) {
453 vm->preempt.rebind_deactivated = true;
454 *done = true;
455 return 0;
456 }
457
458 if (!preempt_fences_waiting(vm)) {
459 *done = true;
460 return 0;
461 }
462
463 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
464 if (err)
465 return err;
466
467 err = wait_for_existing_preempt_fences(vm);
468 if (err)
469 return err;
470
471 /*
472 * Add validation and rebinding to the locking loop since both can
473 * cause evictions which may require blocing dma_resv locks.
474 * The fence reservation here is intended for the new preempt fences
475 * we attach at the end of the rebind work.
476 */
477 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
478 }
479
preempt_rebind_work_func(struct work_struct * w)480 static void preempt_rebind_work_func(struct work_struct *w)
481 {
482 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
483 struct drm_exec exec;
484 unsigned int fence_count = 0;
485 LIST_HEAD(preempt_fences);
486 ktime_t end = 0;
487 int err = 0;
488 long wait;
489 int __maybe_unused tries = 0;
490
491 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
492 trace_xe_vm_rebind_worker_enter(vm);
493
494 down_write(&vm->lock);
495
496 if (xe_vm_is_closed_or_banned(vm)) {
497 up_write(&vm->lock);
498 trace_xe_vm_rebind_worker_exit(vm);
499 return;
500 }
501
502 retry:
503 if (xe_vm_userptr_check_repin(vm)) {
504 err = xe_vm_userptr_pin(vm);
505 if (err)
506 goto out_unlock_outer;
507 }
508
509 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
510
511 drm_exec_until_all_locked(&exec) {
512 bool done = false;
513
514 err = xe_preempt_work_begin(&exec, vm, &done);
515 drm_exec_retry_on_contention(&exec);
516 if (err || done) {
517 drm_exec_fini(&exec);
518 if (err && xe_vm_validate_should_retry(&exec, err, &end))
519 err = -EAGAIN;
520
521 goto out_unlock_outer;
522 }
523 }
524
525 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
526 if (err)
527 goto out_unlock;
528
529 err = xe_vm_rebind(vm, true);
530 if (err)
531 goto out_unlock;
532
533 /* Wait on rebinds and munmap style VM unbinds */
534 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
535 DMA_RESV_USAGE_KERNEL,
536 false, MAX_SCHEDULE_TIMEOUT);
537 if (wait <= 0) {
538 err = -ETIME;
539 goto out_unlock;
540 }
541
542 #define retry_required(__tries, __vm) \
543 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
544 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
545 __xe_vm_userptr_needs_repin(__vm))
546
547 down_read(&vm->userptr.notifier_lock);
548 if (retry_required(tries, vm)) {
549 up_read(&vm->userptr.notifier_lock);
550 err = -EAGAIN;
551 goto out_unlock;
552 }
553
554 #undef retry_required
555
556 spin_lock(&vm->xe->ttm.lru_lock);
557 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
558 spin_unlock(&vm->xe->ttm.lru_lock);
559
560 /* Point of no return. */
561 arm_preempt_fences(vm, &preempt_fences);
562 resume_and_reinstall_preempt_fences(vm, &exec);
563 up_read(&vm->userptr.notifier_lock);
564
565 out_unlock:
566 drm_exec_fini(&exec);
567 out_unlock_outer:
568 if (err == -EAGAIN) {
569 trace_xe_vm_rebind_worker_retry(vm);
570 goto retry;
571 }
572
573 if (err) {
574 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
575 xe_vm_kill(vm, true);
576 }
577 up_write(&vm->lock);
578
579 free_preempt_fences(&preempt_fences);
580
581 trace_xe_vm_rebind_worker_exit(vm);
582 }
583
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)584 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
585 {
586 struct xe_userptr *userptr = &uvma->userptr;
587 struct xe_vma *vma = &uvma->vma;
588 struct dma_resv_iter cursor;
589 struct dma_fence *fence;
590 long err;
591
592 /*
593 * Tell exec and rebind worker they need to repin and rebind this
594 * userptr.
595 */
596 if (!xe_vm_in_fault_mode(vm) &&
597 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
598 spin_lock(&vm->userptr.invalidated_lock);
599 list_move_tail(&userptr->invalidate_link,
600 &vm->userptr.invalidated);
601 spin_unlock(&vm->userptr.invalidated_lock);
602 }
603
604 /*
605 * Preempt fences turn into schedule disables, pipeline these.
606 * Note that even in fault mode, we need to wait for binds and
607 * unbinds to complete, and those are attached as BOOKMARK fences
608 * to the vm.
609 */
610 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
611 DMA_RESV_USAGE_BOOKKEEP);
612 dma_resv_for_each_fence_unlocked(&cursor, fence)
613 dma_fence_enable_sw_signaling(fence);
614 dma_resv_iter_end(&cursor);
615
616 err = dma_resv_wait_timeout(xe_vm_resv(vm),
617 DMA_RESV_USAGE_BOOKKEEP,
618 false, MAX_SCHEDULE_TIMEOUT);
619 XE_WARN_ON(err <= 0);
620
621 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
622 err = xe_vm_invalidate_vma(vma);
623 XE_WARN_ON(err);
624 }
625
626 xe_hmm_userptr_unmap(uvma);
627 }
628
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)629 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
630 const struct mmu_notifier_range *range,
631 unsigned long cur_seq)
632 {
633 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
634 struct xe_vma *vma = &uvma->vma;
635 struct xe_vm *vm = xe_vma_vm(vma);
636
637 xe_assert(vm->xe, xe_vma_is_userptr(vma));
638 trace_xe_vma_userptr_invalidate(vma);
639
640 if (!mmu_notifier_range_blockable(range))
641 return false;
642
643 vm_dbg(&xe_vma_vm(vma)->xe->drm,
644 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
645 xe_vma_start(vma), xe_vma_size(vma));
646
647 down_write(&vm->userptr.notifier_lock);
648 mmu_interval_set_seq(mni, cur_seq);
649
650 __vma_userptr_invalidate(vm, uvma);
651 up_write(&vm->userptr.notifier_lock);
652 trace_xe_vma_userptr_invalidate_complete(vma);
653
654 return true;
655 }
656
657 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
658 .invalidate = vma_userptr_invalidate,
659 };
660
661 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
662 /**
663 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
664 * @uvma: The userptr vma to invalidate
665 *
666 * Perform a forced userptr invalidation for testing purposes.
667 */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)668 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
669 {
670 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
671
672 /* Protect against concurrent userptr pinning */
673 lockdep_assert_held(&vm->lock);
674 /* Protect against concurrent notifiers */
675 lockdep_assert_held(&vm->userptr.notifier_lock);
676 /*
677 * Protect against concurrent instances of this function and
678 * the critical exec sections
679 */
680 xe_vm_assert_held(vm);
681
682 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
683 uvma->userptr.notifier_seq))
684 uvma->userptr.notifier_seq -= 2;
685 __vma_userptr_invalidate(vm, uvma);
686 }
687 #endif
688
xe_vm_userptr_pin(struct xe_vm * vm)689 int xe_vm_userptr_pin(struct xe_vm *vm)
690 {
691 struct xe_userptr_vma *uvma, *next;
692 int err = 0;
693 LIST_HEAD(tmp_evict);
694
695 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
696 lockdep_assert_held_write(&vm->lock);
697
698 /* Collect invalidated userptrs */
699 spin_lock(&vm->userptr.invalidated_lock);
700 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
701 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
702 userptr.invalidate_link) {
703 list_del_init(&uvma->userptr.invalidate_link);
704 list_add_tail(&uvma->userptr.repin_link,
705 &vm->userptr.repin_list);
706 }
707 spin_unlock(&vm->userptr.invalidated_lock);
708
709 /* Pin and move to bind list */
710 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
711 userptr.repin_link) {
712 err = xe_vma_userptr_pin_pages(uvma);
713 if (err == -EFAULT) {
714 list_del_init(&uvma->userptr.repin_link);
715 /*
716 * We might have already done the pin once already, but
717 * then had to retry before the re-bind happened, due
718 * some other condition in the caller, but in the
719 * meantime the userptr got dinged by the notifier such
720 * that we need to revalidate here, but this time we hit
721 * the EFAULT. In such a case make sure we remove
722 * ourselves from the rebind list to avoid going down in
723 * flames.
724 */
725 if (!list_empty(&uvma->vma.combined_links.rebind))
726 list_del_init(&uvma->vma.combined_links.rebind);
727
728 /* Wait for pending binds */
729 xe_vm_lock(vm, false);
730 dma_resv_wait_timeout(xe_vm_resv(vm),
731 DMA_RESV_USAGE_BOOKKEEP,
732 false, MAX_SCHEDULE_TIMEOUT);
733
734 err = xe_vm_invalidate_vma(&uvma->vma);
735 xe_vm_unlock(vm);
736 if (err)
737 break;
738 } else {
739 if (err)
740 break;
741
742 list_del_init(&uvma->userptr.repin_link);
743 list_move_tail(&uvma->vma.combined_links.rebind,
744 &vm->rebind_list);
745 }
746 }
747
748 if (err) {
749 down_write(&vm->userptr.notifier_lock);
750 spin_lock(&vm->userptr.invalidated_lock);
751 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
752 userptr.repin_link) {
753 list_del_init(&uvma->userptr.repin_link);
754 list_move_tail(&uvma->userptr.invalidate_link,
755 &vm->userptr.invalidated);
756 }
757 spin_unlock(&vm->userptr.invalidated_lock);
758 up_write(&vm->userptr.notifier_lock);
759 }
760 return err;
761 }
762
763 /**
764 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
765 * that need repinning.
766 * @vm: The VM.
767 *
768 * This function does an advisory check for whether the VM has userptrs that
769 * need repinning.
770 *
771 * Return: 0 if there are no indications of userptrs needing repinning,
772 * -EAGAIN if there are.
773 */
xe_vm_userptr_check_repin(struct xe_vm * vm)774 int xe_vm_userptr_check_repin(struct xe_vm *vm)
775 {
776 return (list_empty_careful(&vm->userptr.repin_list) &&
777 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
778 }
779
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)780 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
781 {
782 int i;
783
784 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
785 if (!vops->pt_update_ops[i].num_ops)
786 continue;
787
788 vops->pt_update_ops[i].ops =
789 kmalloc_array(vops->pt_update_ops[i].num_ops,
790 sizeof(*vops->pt_update_ops[i].ops),
791 GFP_KERNEL);
792 if (!vops->pt_update_ops[i].ops)
793 return array_of_binds ? -ENOBUFS : -ENOMEM;
794 }
795
796 return 0;
797 }
798
xe_vma_ops_fini(struct xe_vma_ops * vops)799 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
800 {
801 int i;
802
803 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
804 kfree(vops->pt_update_ops[i].ops);
805 }
806
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask)807 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
808 {
809 int i;
810
811 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
812 if (BIT(i) & tile_mask)
813 ++vops->pt_update_ops[i].num_ops;
814 }
815
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)816 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
817 u8 tile_mask)
818 {
819 INIT_LIST_HEAD(&op->link);
820 op->tile_mask = tile_mask;
821 op->base.op = DRM_GPUVA_OP_MAP;
822 op->base.map.va.addr = vma->gpuva.va.addr;
823 op->base.map.va.range = vma->gpuva.va.range;
824 op->base.map.gem.obj = vma->gpuva.gem.obj;
825 op->base.map.gem.offset = vma->gpuva.gem.offset;
826 op->map.vma = vma;
827 op->map.immediate = true;
828 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
829 op->map.is_null = xe_vma_is_null(vma);
830 }
831
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)832 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
833 u8 tile_mask)
834 {
835 struct xe_vma_op *op;
836
837 op = kzalloc(sizeof(*op), GFP_KERNEL);
838 if (!op)
839 return -ENOMEM;
840
841 xe_vm_populate_rebind(op, vma, tile_mask);
842 list_add_tail(&op->link, &vops->list);
843 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
844
845 return 0;
846 }
847
848 static struct dma_fence *ops_execute(struct xe_vm *vm,
849 struct xe_vma_ops *vops);
850 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
851 struct xe_exec_queue *q,
852 struct xe_sync_entry *syncs, u32 num_syncs);
853
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)854 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
855 {
856 struct dma_fence *fence;
857 struct xe_vma *vma, *next;
858 struct xe_vma_ops vops;
859 struct xe_vma_op *op, *next_op;
860 int err, i;
861
862 lockdep_assert_held(&vm->lock);
863 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
864 list_empty(&vm->rebind_list))
865 return 0;
866
867 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
868 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
869 vops.pt_update_ops[i].wait_vm_bookkeep = true;
870
871 xe_vm_assert_held(vm);
872 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
873 xe_assert(vm->xe, vma->tile_present);
874
875 if (rebind_worker)
876 trace_xe_vma_rebind_worker(vma);
877 else
878 trace_xe_vma_rebind_exec(vma);
879
880 err = xe_vm_ops_add_rebind(&vops, vma,
881 vma->tile_present);
882 if (err)
883 goto free_ops;
884 }
885
886 err = xe_vma_ops_alloc(&vops, false);
887 if (err)
888 goto free_ops;
889
890 fence = ops_execute(vm, &vops);
891 if (IS_ERR(fence)) {
892 err = PTR_ERR(fence);
893 } else {
894 dma_fence_put(fence);
895 list_for_each_entry_safe(vma, next, &vm->rebind_list,
896 combined_links.rebind)
897 list_del_init(&vma->combined_links.rebind);
898 }
899 free_ops:
900 list_for_each_entry_safe(op, next_op, &vops.list, link) {
901 list_del(&op->link);
902 kfree(op);
903 }
904 xe_vma_ops_fini(&vops);
905
906 return err;
907 }
908
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)909 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
910 {
911 struct dma_fence *fence = NULL;
912 struct xe_vma_ops vops;
913 struct xe_vma_op *op, *next_op;
914 struct xe_tile *tile;
915 u8 id;
916 int err;
917
918 lockdep_assert_held(&vm->lock);
919 xe_vm_assert_held(vm);
920 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
921
922 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
923 for_each_tile(tile, vm->xe, id) {
924 vops.pt_update_ops[id].wait_vm_bookkeep = true;
925 vops.pt_update_ops[tile->id].q =
926 xe_tile_migrate_exec_queue(tile);
927 }
928
929 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
930 if (err)
931 return ERR_PTR(err);
932
933 err = xe_vma_ops_alloc(&vops, false);
934 if (err) {
935 fence = ERR_PTR(err);
936 goto free_ops;
937 }
938
939 fence = ops_execute(vm, &vops);
940
941 free_ops:
942 list_for_each_entry_safe(op, next_op, &vops.list, link) {
943 list_del(&op->link);
944 kfree(op);
945 }
946 xe_vma_ops_fini(&vops);
947
948 return fence;
949 }
950
xe_vma_free(struct xe_vma * vma)951 static void xe_vma_free(struct xe_vma *vma)
952 {
953 if (xe_vma_is_userptr(vma))
954 kfree(to_userptr_vma(vma));
955 else
956 kfree(vma);
957 }
958
959 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
960 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
961 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
962
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)963 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
964 struct xe_bo *bo,
965 u64 bo_offset_or_userptr,
966 u64 start, u64 end,
967 u16 pat_index, unsigned int flags)
968 {
969 struct xe_vma *vma;
970 struct xe_tile *tile;
971 u8 id;
972 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
973 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
974 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
975
976 xe_assert(vm->xe, start < end);
977 xe_assert(vm->xe, end < vm->size);
978
979 /*
980 * Allocate and ensure that the xe_vma_is_userptr() return
981 * matches what was allocated.
982 */
983 if (!bo && !is_null) {
984 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
985
986 if (!uvma)
987 return ERR_PTR(-ENOMEM);
988
989 vma = &uvma->vma;
990 } else {
991 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
992 if (!vma)
993 return ERR_PTR(-ENOMEM);
994
995 if (is_null)
996 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
997 if (bo)
998 vma->gpuva.gem.obj = &bo->ttm.base;
999 }
1000
1001 INIT_LIST_HEAD(&vma->combined_links.rebind);
1002
1003 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1004 vma->gpuva.vm = &vm->gpuvm;
1005 vma->gpuva.va.addr = start;
1006 vma->gpuva.va.range = end - start + 1;
1007 if (read_only)
1008 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1009 if (dumpable)
1010 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1011
1012 for_each_tile(tile, vm->xe, id)
1013 vma->tile_mask |= 0x1 << id;
1014
1015 if (vm->xe->info.has_atomic_enable_pte_bit)
1016 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1017
1018 vma->pat_index = pat_index;
1019
1020 if (bo) {
1021 struct drm_gpuvm_bo *vm_bo;
1022
1023 xe_bo_assert_held(bo);
1024
1025 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1026 if (IS_ERR(vm_bo)) {
1027 xe_vma_free(vma);
1028 return ERR_CAST(vm_bo);
1029 }
1030
1031 drm_gpuvm_bo_extobj_add(vm_bo);
1032 drm_gem_object_get(&bo->ttm.base);
1033 vma->gpuva.gem.offset = bo_offset_or_userptr;
1034 drm_gpuva_link(&vma->gpuva, vm_bo);
1035 drm_gpuvm_bo_put(vm_bo);
1036 } else /* userptr or null */ {
1037 if (!is_null) {
1038 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1039 u64 size = end - start + 1;
1040 int err;
1041
1042 INIT_LIST_HEAD(&userptr->invalidate_link);
1043 INIT_LIST_HEAD(&userptr->repin_link);
1044 vma->gpuva.gem.offset = bo_offset_or_userptr;
1045 mutex_init(&userptr->unmap_mutex);
1046
1047 err = mmu_interval_notifier_insert(&userptr->notifier,
1048 current->mm,
1049 xe_vma_userptr(vma), size,
1050 &vma_userptr_notifier_ops);
1051 if (err) {
1052 xe_vma_free(vma);
1053 return ERR_PTR(err);
1054 }
1055
1056 userptr->notifier_seq = LONG_MAX;
1057 }
1058
1059 xe_vm_get(vm);
1060 }
1061
1062 return vma;
1063 }
1064
xe_vma_destroy_late(struct xe_vma * vma)1065 static void xe_vma_destroy_late(struct xe_vma *vma)
1066 {
1067 struct xe_vm *vm = xe_vma_vm(vma);
1068
1069 if (vma->ufence) {
1070 xe_sync_ufence_put(vma->ufence);
1071 vma->ufence = NULL;
1072 }
1073
1074 if (xe_vma_is_userptr(vma)) {
1075 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1076 struct xe_userptr *userptr = &uvma->userptr;
1077
1078 if (userptr->sg)
1079 xe_hmm_userptr_free_sg(uvma);
1080
1081 /*
1082 * Since userptr pages are not pinned, we can't remove
1083 * the notifer until we're sure the GPU is not accessing
1084 * them anymore
1085 */
1086 mmu_interval_notifier_remove(&userptr->notifier);
1087 mutex_destroy(&userptr->unmap_mutex);
1088 xe_vm_put(vm);
1089 } else if (xe_vma_is_null(vma)) {
1090 xe_vm_put(vm);
1091 } else {
1092 xe_bo_put(xe_vma_bo(vma));
1093 }
1094
1095 xe_vma_free(vma);
1096 }
1097
vma_destroy_work_func(struct work_struct * w)1098 static void vma_destroy_work_func(struct work_struct *w)
1099 {
1100 struct xe_vma *vma =
1101 container_of(w, struct xe_vma, destroy_work);
1102
1103 xe_vma_destroy_late(vma);
1104 }
1105
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1106 static void vma_destroy_cb(struct dma_fence *fence,
1107 struct dma_fence_cb *cb)
1108 {
1109 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1110
1111 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1112 queue_work(system_unbound_wq, &vma->destroy_work);
1113 }
1114
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1115 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1116 {
1117 struct xe_vm *vm = xe_vma_vm(vma);
1118
1119 lockdep_assert_held_write(&vm->lock);
1120 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1121
1122 if (xe_vma_is_userptr(vma)) {
1123 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1124
1125 spin_lock(&vm->userptr.invalidated_lock);
1126 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1127 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1128 spin_unlock(&vm->userptr.invalidated_lock);
1129 } else if (!xe_vma_is_null(vma)) {
1130 xe_bo_assert_held(xe_vma_bo(vma));
1131
1132 drm_gpuva_unlink(&vma->gpuva);
1133 }
1134
1135 xe_vm_assert_held(vm);
1136 if (fence) {
1137 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1138 vma_destroy_cb);
1139
1140 if (ret) {
1141 XE_WARN_ON(ret != -ENOENT);
1142 xe_vma_destroy_late(vma);
1143 }
1144 } else {
1145 xe_vma_destroy_late(vma);
1146 }
1147 }
1148
1149 /**
1150 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1151 * @exec: The drm_exec object we're currently locking for.
1152 * @vma: The vma for witch we want to lock the vm resv and any attached
1153 * object's resv.
1154 *
1155 * Return: 0 on success, negative error code on error. In particular
1156 * may return -EDEADLK on WW transaction contention and -EINTR if
1157 * an interruptible wait is terminated by a signal.
1158 */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1159 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1160 {
1161 struct xe_vm *vm = xe_vma_vm(vma);
1162 struct xe_bo *bo = xe_vma_bo(vma);
1163 int err;
1164
1165 XE_WARN_ON(!vm);
1166
1167 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1168 if (!err && bo && !bo->vm)
1169 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1170
1171 return err;
1172 }
1173
xe_vma_destroy_unlocked(struct xe_vma * vma)1174 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1175 {
1176 struct drm_exec exec;
1177 int err;
1178
1179 drm_exec_init(&exec, 0, 0);
1180 drm_exec_until_all_locked(&exec) {
1181 err = xe_vm_lock_vma(&exec, vma);
1182 drm_exec_retry_on_contention(&exec);
1183 if (XE_WARN_ON(err))
1184 break;
1185 }
1186
1187 xe_vma_destroy(vma, NULL);
1188
1189 drm_exec_fini(&exec);
1190 }
1191
1192 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1193 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1194 {
1195 struct drm_gpuva *gpuva;
1196
1197 lockdep_assert_held(&vm->lock);
1198
1199 if (xe_vm_is_closed_or_banned(vm))
1200 return NULL;
1201
1202 xe_assert(vm->xe, start + range <= vm->size);
1203
1204 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1205
1206 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1207 }
1208
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1209 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1210 {
1211 int err;
1212
1213 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1214 lockdep_assert_held(&vm->lock);
1215
1216 mutex_lock(&vm->snap_mutex);
1217 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1218 mutex_unlock(&vm->snap_mutex);
1219 XE_WARN_ON(err); /* Shouldn't be possible */
1220
1221 return err;
1222 }
1223
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1224 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1225 {
1226 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1227 lockdep_assert_held(&vm->lock);
1228
1229 mutex_lock(&vm->snap_mutex);
1230 drm_gpuva_remove(&vma->gpuva);
1231 mutex_unlock(&vm->snap_mutex);
1232 if (vm->usm.last_fault_vma == vma)
1233 vm->usm.last_fault_vma = NULL;
1234 }
1235
xe_vm_op_alloc(void)1236 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1237 {
1238 struct xe_vma_op *op;
1239
1240 op = kzalloc(sizeof(*op), GFP_KERNEL);
1241
1242 if (unlikely(!op))
1243 return NULL;
1244
1245 return &op->base;
1246 }
1247
1248 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1249
1250 static const struct drm_gpuvm_ops gpuvm_ops = {
1251 .op_alloc = xe_vm_op_alloc,
1252 .vm_bo_validate = xe_gpuvm_validate,
1253 .vm_free = xe_vm_free,
1254 };
1255
pde_encode_pat_index(u16 pat_index)1256 static u64 pde_encode_pat_index(u16 pat_index)
1257 {
1258 u64 pte = 0;
1259
1260 if (pat_index & BIT(0))
1261 pte |= XE_PPGTT_PTE_PAT0;
1262
1263 if (pat_index & BIT(1))
1264 pte |= XE_PPGTT_PTE_PAT1;
1265
1266 return pte;
1267 }
1268
pte_encode_pat_index(u16 pat_index,u32 pt_level)1269 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1270 {
1271 u64 pte = 0;
1272
1273 if (pat_index & BIT(0))
1274 pte |= XE_PPGTT_PTE_PAT0;
1275
1276 if (pat_index & BIT(1))
1277 pte |= XE_PPGTT_PTE_PAT1;
1278
1279 if (pat_index & BIT(2)) {
1280 if (pt_level)
1281 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1282 else
1283 pte |= XE_PPGTT_PTE_PAT2;
1284 }
1285
1286 if (pat_index & BIT(3))
1287 pte |= XELPG_PPGTT_PTE_PAT3;
1288
1289 if (pat_index & (BIT(4)))
1290 pte |= XE2_PPGTT_PTE_PAT4;
1291
1292 return pte;
1293 }
1294
pte_encode_ps(u32 pt_level)1295 static u64 pte_encode_ps(u32 pt_level)
1296 {
1297 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1298
1299 if (pt_level == 1)
1300 return XE_PDE_PS_2M;
1301 else if (pt_level == 2)
1302 return XE_PDPE_PS_1G;
1303
1304 return 0;
1305 }
1306
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1307 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1308 const u16 pat_index)
1309 {
1310 u64 pde;
1311
1312 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1313 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1314 pde |= pde_encode_pat_index(pat_index);
1315
1316 return pde;
1317 }
1318
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1319 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1320 u16 pat_index, u32 pt_level)
1321 {
1322 u64 pte;
1323
1324 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1325 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1326 pte |= pte_encode_pat_index(pat_index, pt_level);
1327 pte |= pte_encode_ps(pt_level);
1328
1329 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1330 pte |= XE_PPGTT_PTE_DM;
1331
1332 return pte;
1333 }
1334
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1335 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1336 u16 pat_index, u32 pt_level)
1337 {
1338 pte |= XE_PAGE_PRESENT;
1339
1340 if (likely(!xe_vma_read_only(vma)))
1341 pte |= XE_PAGE_RW;
1342
1343 pte |= pte_encode_pat_index(pat_index, pt_level);
1344 pte |= pte_encode_ps(pt_level);
1345
1346 if (unlikely(xe_vma_is_null(vma)))
1347 pte |= XE_PTE_NULL;
1348
1349 return pte;
1350 }
1351
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1352 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1353 u16 pat_index,
1354 u32 pt_level, bool devmem, u64 flags)
1355 {
1356 u64 pte;
1357
1358 /* Avoid passing random bits directly as flags */
1359 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1360
1361 pte = addr;
1362 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1363 pte |= pte_encode_pat_index(pat_index, pt_level);
1364 pte |= pte_encode_ps(pt_level);
1365
1366 if (devmem)
1367 pte |= XE_PPGTT_PTE_DM;
1368
1369 pte |= flags;
1370
1371 return pte;
1372 }
1373
1374 static const struct xe_pt_ops xelp_pt_ops = {
1375 .pte_encode_bo = xelp_pte_encode_bo,
1376 .pte_encode_vma = xelp_pte_encode_vma,
1377 .pte_encode_addr = xelp_pte_encode_addr,
1378 .pde_encode_bo = xelp_pde_encode_bo,
1379 };
1380
1381 static void vm_destroy_work_func(struct work_struct *w);
1382
1383 /**
1384 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1385 * given tile and vm.
1386 * @xe: xe device.
1387 * @tile: tile to set up for.
1388 * @vm: vm to set up for.
1389 *
1390 * Sets up a pagetable tree with one page-table per level and a single
1391 * leaf PTE. All pagetable entries point to the single page-table or,
1392 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1393 * writes become NOPs.
1394 *
1395 * Return: 0 on success, negative error code on error.
1396 */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1397 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1398 struct xe_vm *vm)
1399 {
1400 u8 id = tile->id;
1401 int i;
1402
1403 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1404 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1405 if (IS_ERR(vm->scratch_pt[id][i])) {
1406 int err = PTR_ERR(vm->scratch_pt[id][i]);
1407
1408 vm->scratch_pt[id][i] = NULL;
1409 return err;
1410 }
1411
1412 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1413 }
1414
1415 return 0;
1416 }
1417
xe_vm_free_scratch(struct xe_vm * vm)1418 static void xe_vm_free_scratch(struct xe_vm *vm)
1419 {
1420 struct xe_tile *tile;
1421 u8 id;
1422
1423 if (!xe_vm_has_scratch(vm))
1424 return;
1425
1426 for_each_tile(tile, vm->xe, id) {
1427 u32 i;
1428
1429 if (!vm->pt_root[id])
1430 continue;
1431
1432 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1433 if (vm->scratch_pt[id][i])
1434 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1435 }
1436 }
1437
xe_vm_create(struct xe_device * xe,u32 flags)1438 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1439 {
1440 struct drm_gem_object *vm_resv_obj;
1441 struct xe_vm *vm;
1442 int err, number_tiles = 0;
1443 struct xe_tile *tile;
1444 u8 id;
1445
1446 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1447 if (!vm)
1448 return ERR_PTR(-ENOMEM);
1449
1450 vm->xe = xe;
1451
1452 vm->size = 1ull << xe->info.va_bits;
1453
1454 vm->flags = flags;
1455
1456 init_rwsem(&vm->lock);
1457 mutex_init(&vm->snap_mutex);
1458
1459 INIT_LIST_HEAD(&vm->rebind_list);
1460
1461 INIT_LIST_HEAD(&vm->userptr.repin_list);
1462 INIT_LIST_HEAD(&vm->userptr.invalidated);
1463 init_rwsem(&vm->userptr.notifier_lock);
1464 spin_lock_init(&vm->userptr.invalidated_lock);
1465
1466 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1467
1468 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1469
1470 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1471 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1472
1473 for_each_tile(tile, xe, id)
1474 xe_range_fence_tree_init(&vm->rftree[id]);
1475
1476 vm->pt_ops = &xelp_pt_ops;
1477
1478 /*
1479 * Long-running workloads are not protected by the scheduler references.
1480 * By design, run_job for long-running workloads returns NULL and the
1481 * scheduler drops all the references of it, hence protecting the VM
1482 * for this case is necessary.
1483 */
1484 if (flags & XE_VM_FLAG_LR_MODE) {
1485 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1486 xe_pm_runtime_get_noresume(xe);
1487 }
1488
1489 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1490 if (!vm_resv_obj) {
1491 err = -ENOMEM;
1492 goto err_no_resv;
1493 }
1494
1495 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1496 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1497
1498 drm_gem_object_put(vm_resv_obj);
1499
1500 err = xe_vm_lock(vm, true);
1501 if (err)
1502 goto err_close;
1503
1504 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1505 vm->flags |= XE_VM_FLAG_64K;
1506
1507 for_each_tile(tile, xe, id) {
1508 if (flags & XE_VM_FLAG_MIGRATION &&
1509 tile->id != XE_VM_FLAG_TILE_ID(flags))
1510 continue;
1511
1512 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1513 if (IS_ERR(vm->pt_root[id])) {
1514 err = PTR_ERR(vm->pt_root[id]);
1515 vm->pt_root[id] = NULL;
1516 goto err_unlock_close;
1517 }
1518 }
1519
1520 if (xe_vm_has_scratch(vm)) {
1521 for_each_tile(tile, xe, id) {
1522 if (!vm->pt_root[id])
1523 continue;
1524
1525 err = xe_vm_create_scratch(xe, tile, vm);
1526 if (err)
1527 goto err_unlock_close;
1528 }
1529 vm->batch_invalidate_tlb = true;
1530 }
1531
1532 if (vm->flags & XE_VM_FLAG_LR_MODE)
1533 vm->batch_invalidate_tlb = false;
1534
1535 /* Fill pt_root after allocating scratch tables */
1536 for_each_tile(tile, xe, id) {
1537 if (!vm->pt_root[id])
1538 continue;
1539
1540 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1541 }
1542 xe_vm_unlock(vm);
1543
1544 /* Kernel migration VM shouldn't have a circular loop.. */
1545 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1546 for_each_tile(tile, xe, id) {
1547 struct xe_exec_queue *q;
1548 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1549
1550 if (!vm->pt_root[id])
1551 continue;
1552
1553 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1554 if (IS_ERR(q)) {
1555 err = PTR_ERR(q);
1556 goto err_close;
1557 }
1558 vm->q[id] = q;
1559 number_tiles++;
1560 }
1561 }
1562
1563 if (number_tiles > 1)
1564 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1565
1566 trace_xe_vm_create(vm);
1567
1568 return vm;
1569
1570 err_unlock_close:
1571 xe_vm_unlock(vm);
1572 err_close:
1573 xe_vm_close_and_put(vm);
1574 return ERR_PTR(err);
1575
1576 err_no_resv:
1577 mutex_destroy(&vm->snap_mutex);
1578 for_each_tile(tile, xe, id)
1579 xe_range_fence_tree_fini(&vm->rftree[id]);
1580 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1581 kfree(vm);
1582 if (flags & XE_VM_FLAG_LR_MODE)
1583 xe_pm_runtime_put(xe);
1584 return ERR_PTR(err);
1585 }
1586
xe_vm_close(struct xe_vm * vm)1587 static void xe_vm_close(struct xe_vm *vm)
1588 {
1589 struct xe_device *xe = vm->xe;
1590 bool bound;
1591 int idx;
1592
1593 bound = drm_dev_enter(&xe->drm, &idx);
1594
1595 down_write(&vm->lock);
1596
1597 vm->size = 0;
1598
1599 if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1600 struct xe_tile *tile;
1601 struct xe_gt *gt;
1602 u8 id;
1603
1604 /* Wait for pending binds */
1605 dma_resv_wait_timeout(xe_vm_resv(vm),
1606 DMA_RESV_USAGE_BOOKKEEP,
1607 false, MAX_SCHEDULE_TIMEOUT);
1608
1609 if (bound) {
1610 for_each_tile(tile, xe, id)
1611 if (vm->pt_root[id])
1612 xe_pt_clear(xe, vm->pt_root[id]);
1613
1614 for_each_gt(gt, xe, id)
1615 xe_gt_tlb_invalidation_vm(gt, vm);
1616 }
1617 }
1618
1619 up_write(&vm->lock);
1620
1621 if (bound)
1622 drm_dev_exit(idx);
1623 }
1624
xe_vm_close_and_put(struct xe_vm * vm)1625 void xe_vm_close_and_put(struct xe_vm *vm)
1626 {
1627 LIST_HEAD(contested);
1628 struct xe_device *xe = vm->xe;
1629 struct xe_tile *tile;
1630 struct xe_vma *vma, *next_vma;
1631 struct drm_gpuva *gpuva, *next;
1632 u8 id;
1633
1634 xe_assert(xe, !vm->preempt.num_exec_queues);
1635
1636 xe_vm_close(vm);
1637 if (xe_vm_in_preempt_fence_mode(vm))
1638 flush_work(&vm->preempt.rebind_work);
1639
1640 down_write(&vm->lock);
1641 for_each_tile(tile, xe, id) {
1642 if (vm->q[id])
1643 xe_exec_queue_last_fence_put(vm->q[id], vm);
1644 }
1645 up_write(&vm->lock);
1646
1647 for_each_tile(tile, xe, id) {
1648 if (vm->q[id]) {
1649 xe_exec_queue_kill(vm->q[id]);
1650 xe_exec_queue_put(vm->q[id]);
1651 vm->q[id] = NULL;
1652 }
1653 }
1654
1655 down_write(&vm->lock);
1656 xe_vm_lock(vm, false);
1657 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1658 vma = gpuva_to_vma(gpuva);
1659
1660 if (xe_vma_has_no_bo(vma)) {
1661 down_read(&vm->userptr.notifier_lock);
1662 vma->gpuva.flags |= XE_VMA_DESTROYED;
1663 up_read(&vm->userptr.notifier_lock);
1664 }
1665
1666 xe_vm_remove_vma(vm, vma);
1667
1668 /* easy case, remove from VMA? */
1669 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1670 list_del_init(&vma->combined_links.rebind);
1671 xe_vma_destroy(vma, NULL);
1672 continue;
1673 }
1674
1675 list_move_tail(&vma->combined_links.destroy, &contested);
1676 vma->gpuva.flags |= XE_VMA_DESTROYED;
1677 }
1678
1679 /*
1680 * All vm operations will add shared fences to resv.
1681 * The only exception is eviction for a shared object,
1682 * but even so, the unbind when evicted would still
1683 * install a fence to resv. Hence it's safe to
1684 * destroy the pagetables immediately.
1685 */
1686 xe_vm_free_scratch(vm);
1687
1688 for_each_tile(tile, xe, id) {
1689 if (vm->pt_root[id]) {
1690 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1691 vm->pt_root[id] = NULL;
1692 }
1693 }
1694 xe_vm_unlock(vm);
1695
1696 /*
1697 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1698 * Since we hold a refcount to the bo, we can remove and free
1699 * the members safely without locking.
1700 */
1701 list_for_each_entry_safe(vma, next_vma, &contested,
1702 combined_links.destroy) {
1703 list_del_init(&vma->combined_links.destroy);
1704 xe_vma_destroy_unlocked(vma);
1705 }
1706
1707 up_write(&vm->lock);
1708
1709 down_write(&xe->usm.lock);
1710 if (vm->usm.asid) {
1711 void *lookup;
1712
1713 xe_assert(xe, xe->info.has_asid);
1714 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1715
1716 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1717 xe_assert(xe, lookup == vm);
1718 }
1719 up_write(&xe->usm.lock);
1720
1721 for_each_tile(tile, xe, id)
1722 xe_range_fence_tree_fini(&vm->rftree[id]);
1723
1724 xe_vm_put(vm);
1725 }
1726
vm_destroy_work_func(struct work_struct * w)1727 static void vm_destroy_work_func(struct work_struct *w)
1728 {
1729 struct xe_vm *vm =
1730 container_of(w, struct xe_vm, destroy_work);
1731 struct xe_device *xe = vm->xe;
1732 struct xe_tile *tile;
1733 u8 id;
1734
1735 /* xe_vm_close_and_put was not called? */
1736 xe_assert(xe, !vm->size);
1737
1738 if (xe_vm_in_preempt_fence_mode(vm))
1739 flush_work(&vm->preempt.rebind_work);
1740
1741 mutex_destroy(&vm->snap_mutex);
1742
1743 if (vm->flags & XE_VM_FLAG_LR_MODE)
1744 xe_pm_runtime_put(xe);
1745
1746 for_each_tile(tile, xe, id)
1747 XE_WARN_ON(vm->pt_root[id]);
1748
1749 trace_xe_vm_free(vm);
1750
1751 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1752
1753 if (vm->xef)
1754 xe_file_put(vm->xef);
1755
1756 kfree(vm);
1757 }
1758
xe_vm_free(struct drm_gpuvm * gpuvm)1759 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1760 {
1761 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1762
1763 /* To destroy the VM we need to be able to sleep */
1764 queue_work(system_unbound_wq, &vm->destroy_work);
1765 }
1766
xe_vm_lookup(struct xe_file * xef,u32 id)1767 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1768 {
1769 struct xe_vm *vm;
1770
1771 mutex_lock(&xef->vm.lock);
1772 vm = xa_load(&xef->vm.xa, id);
1773 if (vm)
1774 xe_vm_get(vm);
1775 mutex_unlock(&xef->vm.lock);
1776
1777 return vm;
1778 }
1779
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1780 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1781 {
1782 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1783 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1784 }
1785
1786 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1787 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1788 {
1789 return q ? q : vm->q[0];
1790 }
1791
1792 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1793 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1794 {
1795 unsigned int i;
1796
1797 for (i = 0; i < num_syncs; i++) {
1798 struct xe_sync_entry *e = &syncs[i];
1799
1800 if (xe_sync_is_ufence(e))
1801 return xe_sync_ufence_get(e);
1802 }
1803
1804 return NULL;
1805 }
1806
1807 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1808 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1809 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1810
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1811 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1812 struct drm_file *file)
1813 {
1814 struct xe_device *xe = to_xe_device(dev);
1815 struct xe_file *xef = to_xe_file(file);
1816 struct drm_xe_vm_create *args = data;
1817 struct xe_tile *tile;
1818 struct xe_vm *vm;
1819 u32 id, asid;
1820 int err;
1821 u32 flags = 0;
1822
1823 if (XE_IOCTL_DBG(xe, args->extensions))
1824 return -EINVAL;
1825
1826 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1827 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1828
1829 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1830 !xe->info.has_usm))
1831 return -EINVAL;
1832
1833 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1834 return -EINVAL;
1835
1836 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1837 return -EINVAL;
1838
1839 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1840 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1841 return -EINVAL;
1842
1843 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1844 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1845 return -EINVAL;
1846
1847 if (XE_IOCTL_DBG(xe, args->extensions))
1848 return -EINVAL;
1849
1850 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1851 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1852 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1853 flags |= XE_VM_FLAG_LR_MODE;
1854 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1855 flags |= XE_VM_FLAG_FAULT_MODE;
1856
1857 vm = xe_vm_create(xe, flags);
1858 if (IS_ERR(vm))
1859 return PTR_ERR(vm);
1860
1861 if (xe->info.has_asid) {
1862 down_write(&xe->usm.lock);
1863 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1864 XA_LIMIT(1, XE_MAX_ASID - 1),
1865 &xe->usm.next_asid, GFP_KERNEL);
1866 up_write(&xe->usm.lock);
1867 if (err < 0)
1868 goto err_close_and_put;
1869
1870 vm->usm.asid = asid;
1871 }
1872
1873 vm->xef = xe_file_get(xef);
1874
1875 /* Record BO memory for VM pagetable created against client */
1876 for_each_tile(tile, xe, id)
1877 if (vm->pt_root[id])
1878 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1879
1880 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1881 /* Warning: Security issue - never enable by default */
1882 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1883 #endif
1884
1885 /* user id alloc must always be last in ioctl to prevent UAF */
1886 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1887 if (err)
1888 goto err_close_and_put;
1889
1890 args->vm_id = id;
1891
1892 return 0;
1893
1894 err_close_and_put:
1895 xe_vm_close_and_put(vm);
1896
1897 return err;
1898 }
1899
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1900 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1901 struct drm_file *file)
1902 {
1903 struct xe_device *xe = to_xe_device(dev);
1904 struct xe_file *xef = to_xe_file(file);
1905 struct drm_xe_vm_destroy *args = data;
1906 struct xe_vm *vm;
1907 int err = 0;
1908
1909 if (XE_IOCTL_DBG(xe, args->pad) ||
1910 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1911 return -EINVAL;
1912
1913 mutex_lock(&xef->vm.lock);
1914 vm = xa_load(&xef->vm.xa, args->vm_id);
1915 if (XE_IOCTL_DBG(xe, !vm))
1916 err = -ENOENT;
1917 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1918 err = -EBUSY;
1919 else
1920 xa_erase(&xef->vm.xa, args->vm_id);
1921 mutex_unlock(&xef->vm.lock);
1922
1923 if (!err)
1924 xe_vm_close_and_put(vm);
1925
1926 return err;
1927 }
1928
1929 static const u32 region_to_mem_type[] = {
1930 XE_PL_TT,
1931 XE_PL_VRAM0,
1932 XE_PL_VRAM1,
1933 };
1934
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)1935 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1936 bool post_commit)
1937 {
1938 down_read(&vm->userptr.notifier_lock);
1939 vma->gpuva.flags |= XE_VMA_DESTROYED;
1940 up_read(&vm->userptr.notifier_lock);
1941 if (post_commit)
1942 xe_vm_remove_vma(vm, vma);
1943 }
1944
1945 #undef ULL
1946 #define ULL unsigned long long
1947
1948 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1949 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1950 {
1951 struct xe_vma *vma;
1952
1953 switch (op->op) {
1954 case DRM_GPUVA_OP_MAP:
1955 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1956 (ULL)op->map.va.addr, (ULL)op->map.va.range);
1957 break;
1958 case DRM_GPUVA_OP_REMAP:
1959 vma = gpuva_to_vma(op->remap.unmap->va);
1960 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1961 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1962 op->remap.unmap->keep ? 1 : 0);
1963 if (op->remap.prev)
1964 vm_dbg(&xe->drm,
1965 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1966 (ULL)op->remap.prev->va.addr,
1967 (ULL)op->remap.prev->va.range);
1968 if (op->remap.next)
1969 vm_dbg(&xe->drm,
1970 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
1971 (ULL)op->remap.next->va.addr,
1972 (ULL)op->remap.next->va.range);
1973 break;
1974 case DRM_GPUVA_OP_UNMAP:
1975 vma = gpuva_to_vma(op->unmap.va);
1976 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1977 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1978 op->unmap.keep ? 1 : 0);
1979 break;
1980 case DRM_GPUVA_OP_PREFETCH:
1981 vma = gpuva_to_vma(op->prefetch.va);
1982 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
1983 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
1984 break;
1985 default:
1986 drm_warn(&xe->drm, "NOT POSSIBLE");
1987 }
1988 }
1989 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)1990 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1991 {
1992 }
1993 #endif
1994
1995 /*
1996 * Create operations list from IOCTL arguments, setup operations fields so parse
1997 * and commit steps are decoupled from IOCTL arguments. This step can fail.
1998 */
1999 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2000 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2001 u64 bo_offset_or_userptr, u64 addr, u64 range,
2002 u32 operation, u32 flags,
2003 u32 prefetch_region, u16 pat_index)
2004 {
2005 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2006 struct drm_gpuva_ops *ops;
2007 struct drm_gpuva_op *__op;
2008 struct drm_gpuvm_bo *vm_bo;
2009 int err;
2010
2011 lockdep_assert_held_write(&vm->lock);
2012
2013 vm_dbg(&vm->xe->drm,
2014 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2015 operation, (ULL)addr, (ULL)range,
2016 (ULL)bo_offset_or_userptr);
2017
2018 switch (operation) {
2019 case DRM_XE_VM_BIND_OP_MAP:
2020 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2021 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2022 obj, bo_offset_or_userptr);
2023 break;
2024 case DRM_XE_VM_BIND_OP_UNMAP:
2025 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2026 break;
2027 case DRM_XE_VM_BIND_OP_PREFETCH:
2028 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2029 break;
2030 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2031 xe_assert(vm->xe, bo);
2032
2033 err = xe_bo_lock(bo, true);
2034 if (err)
2035 return ERR_PTR(err);
2036
2037 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2038 if (IS_ERR(vm_bo)) {
2039 xe_bo_unlock(bo);
2040 return ERR_CAST(vm_bo);
2041 }
2042
2043 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2044 drm_gpuvm_bo_put(vm_bo);
2045 xe_bo_unlock(bo);
2046 break;
2047 default:
2048 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2049 ops = ERR_PTR(-EINVAL);
2050 }
2051 if (IS_ERR(ops))
2052 return ops;
2053
2054 drm_gpuva_for_each_op(__op, ops) {
2055 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2056
2057 if (__op->op == DRM_GPUVA_OP_MAP) {
2058 op->map.immediate =
2059 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2060 op->map.read_only =
2061 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2062 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2063 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2064 op->map.pat_index = pat_index;
2065 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2066 op->prefetch.region = prefetch_region;
2067 }
2068
2069 print_op(vm->xe, __op);
2070 }
2071
2072 return ops;
2073 }
2074
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2075 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2076 u16 pat_index, unsigned int flags)
2077 {
2078 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2079 struct drm_exec exec;
2080 struct xe_vma *vma;
2081 int err = 0;
2082
2083 lockdep_assert_held_write(&vm->lock);
2084
2085 if (bo) {
2086 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2087 drm_exec_until_all_locked(&exec) {
2088 err = 0;
2089 if (!bo->vm) {
2090 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2091 drm_exec_retry_on_contention(&exec);
2092 }
2093 if (!err) {
2094 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2095 drm_exec_retry_on_contention(&exec);
2096 }
2097 if (err) {
2098 drm_exec_fini(&exec);
2099 return ERR_PTR(err);
2100 }
2101 }
2102 }
2103 vma = xe_vma_create(vm, bo, op->gem.offset,
2104 op->va.addr, op->va.addr +
2105 op->va.range - 1, pat_index, flags);
2106 if (IS_ERR(vma))
2107 goto err_unlock;
2108
2109 if (xe_vma_is_userptr(vma))
2110 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2111 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2112 err = add_preempt_fences(vm, bo);
2113
2114 err_unlock:
2115 if (bo)
2116 drm_exec_fini(&exec);
2117
2118 if (err) {
2119 prep_vma_destroy(vm, vma, false);
2120 xe_vma_destroy_unlocked(vma);
2121 vma = ERR_PTR(err);
2122 }
2123
2124 return vma;
2125 }
2126
xe_vma_max_pte_size(struct xe_vma * vma)2127 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2128 {
2129 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2130 return SZ_1G;
2131 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2132 return SZ_2M;
2133 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2134 return SZ_64K;
2135 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2136 return SZ_4K;
2137
2138 return SZ_1G; /* Uninitialized, used max size */
2139 }
2140
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2141 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2142 {
2143 switch (size) {
2144 case SZ_1G:
2145 vma->gpuva.flags |= XE_VMA_PTE_1G;
2146 break;
2147 case SZ_2M:
2148 vma->gpuva.flags |= XE_VMA_PTE_2M;
2149 break;
2150 case SZ_64K:
2151 vma->gpuva.flags |= XE_VMA_PTE_64K;
2152 break;
2153 case SZ_4K:
2154 vma->gpuva.flags |= XE_VMA_PTE_4K;
2155 break;
2156 }
2157 }
2158
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2159 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2160 {
2161 int err = 0;
2162
2163 lockdep_assert_held_write(&vm->lock);
2164
2165 switch (op->base.op) {
2166 case DRM_GPUVA_OP_MAP:
2167 err |= xe_vm_insert_vma(vm, op->map.vma);
2168 if (!err)
2169 op->flags |= XE_VMA_OP_COMMITTED;
2170 break;
2171 case DRM_GPUVA_OP_REMAP:
2172 {
2173 u8 tile_present =
2174 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2175
2176 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2177 true);
2178 op->flags |= XE_VMA_OP_COMMITTED;
2179
2180 if (op->remap.prev) {
2181 err |= xe_vm_insert_vma(vm, op->remap.prev);
2182 if (!err)
2183 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2184 if (!err && op->remap.skip_prev) {
2185 op->remap.prev->tile_present =
2186 tile_present;
2187 op->remap.prev = NULL;
2188 }
2189 }
2190 if (op->remap.next) {
2191 err |= xe_vm_insert_vma(vm, op->remap.next);
2192 if (!err)
2193 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2194 if (!err && op->remap.skip_next) {
2195 op->remap.next->tile_present =
2196 tile_present;
2197 op->remap.next = NULL;
2198 }
2199 }
2200
2201 /* Adjust for partial unbind after removin VMA from VM */
2202 if (!err) {
2203 op->base.remap.unmap->va->va.addr = op->remap.start;
2204 op->base.remap.unmap->va->va.range = op->remap.range;
2205 }
2206 break;
2207 }
2208 case DRM_GPUVA_OP_UNMAP:
2209 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2210 op->flags |= XE_VMA_OP_COMMITTED;
2211 break;
2212 case DRM_GPUVA_OP_PREFETCH:
2213 op->flags |= XE_VMA_OP_COMMITTED;
2214 break;
2215 default:
2216 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2217 }
2218
2219 return err;
2220 }
2221
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2222 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2223 struct xe_vma_ops *vops)
2224 {
2225 struct xe_device *xe = vm->xe;
2226 struct drm_gpuva_op *__op;
2227 struct xe_tile *tile;
2228 u8 id, tile_mask = 0;
2229 int err = 0;
2230
2231 lockdep_assert_held_write(&vm->lock);
2232
2233 for_each_tile(tile, vm->xe, id)
2234 tile_mask |= 0x1 << id;
2235
2236 drm_gpuva_for_each_op(__op, ops) {
2237 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2238 struct xe_vma *vma;
2239 unsigned int flags = 0;
2240
2241 INIT_LIST_HEAD(&op->link);
2242 list_add_tail(&op->link, &vops->list);
2243 op->tile_mask = tile_mask;
2244
2245 switch (op->base.op) {
2246 case DRM_GPUVA_OP_MAP:
2247 {
2248 flags |= op->map.read_only ?
2249 VMA_CREATE_FLAG_READ_ONLY : 0;
2250 flags |= op->map.is_null ?
2251 VMA_CREATE_FLAG_IS_NULL : 0;
2252 flags |= op->map.dumpable ?
2253 VMA_CREATE_FLAG_DUMPABLE : 0;
2254
2255 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2256 flags);
2257 if (IS_ERR(vma))
2258 return PTR_ERR(vma);
2259
2260 op->map.vma = vma;
2261 if (op->map.immediate || !xe_vm_in_fault_mode(vm))
2262 xe_vma_ops_incr_pt_update_ops(vops,
2263 op->tile_mask);
2264 break;
2265 }
2266 case DRM_GPUVA_OP_REMAP:
2267 {
2268 struct xe_vma *old =
2269 gpuva_to_vma(op->base.remap.unmap->va);
2270
2271 op->remap.start = xe_vma_start(old);
2272 op->remap.range = xe_vma_size(old);
2273
2274 if (op->base.remap.prev) {
2275 flags |= op->base.remap.unmap->va->flags &
2276 XE_VMA_READ_ONLY ?
2277 VMA_CREATE_FLAG_READ_ONLY : 0;
2278 flags |= op->base.remap.unmap->va->flags &
2279 DRM_GPUVA_SPARSE ?
2280 VMA_CREATE_FLAG_IS_NULL : 0;
2281 flags |= op->base.remap.unmap->va->flags &
2282 XE_VMA_DUMPABLE ?
2283 VMA_CREATE_FLAG_DUMPABLE : 0;
2284
2285 vma = new_vma(vm, op->base.remap.prev,
2286 old->pat_index, flags);
2287 if (IS_ERR(vma))
2288 return PTR_ERR(vma);
2289
2290 op->remap.prev = vma;
2291
2292 /*
2293 * Userptr creates a new SG mapping so
2294 * we must also rebind.
2295 */
2296 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2297 IS_ALIGNED(xe_vma_end(vma),
2298 xe_vma_max_pte_size(old));
2299 if (op->remap.skip_prev) {
2300 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2301 op->remap.range -=
2302 xe_vma_end(vma) -
2303 xe_vma_start(old);
2304 op->remap.start = xe_vma_end(vma);
2305 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2306 (ULL)op->remap.start,
2307 (ULL)op->remap.range);
2308 } else {
2309 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2310 }
2311 }
2312
2313 if (op->base.remap.next) {
2314 flags |= op->base.remap.unmap->va->flags &
2315 XE_VMA_READ_ONLY ?
2316 VMA_CREATE_FLAG_READ_ONLY : 0;
2317 flags |= op->base.remap.unmap->va->flags &
2318 DRM_GPUVA_SPARSE ?
2319 VMA_CREATE_FLAG_IS_NULL : 0;
2320 flags |= op->base.remap.unmap->va->flags &
2321 XE_VMA_DUMPABLE ?
2322 VMA_CREATE_FLAG_DUMPABLE : 0;
2323
2324 vma = new_vma(vm, op->base.remap.next,
2325 old->pat_index, flags);
2326 if (IS_ERR(vma))
2327 return PTR_ERR(vma);
2328
2329 op->remap.next = vma;
2330
2331 /*
2332 * Userptr creates a new SG mapping so
2333 * we must also rebind.
2334 */
2335 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2336 IS_ALIGNED(xe_vma_start(vma),
2337 xe_vma_max_pte_size(old));
2338 if (op->remap.skip_next) {
2339 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2340 op->remap.range -=
2341 xe_vma_end(old) -
2342 xe_vma_start(vma);
2343 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2344 (ULL)op->remap.start,
2345 (ULL)op->remap.range);
2346 } else {
2347 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2348 }
2349 }
2350 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2351 break;
2352 }
2353 case DRM_GPUVA_OP_UNMAP:
2354 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2355 break;
2356 case DRM_GPUVA_OP_PREFETCH:
2357 vma = gpuva_to_vma(op->base.prefetch.va);
2358
2359 if (xe_vma_is_userptr(vma)) {
2360 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2361 if (err)
2362 return err;
2363 }
2364
2365 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2366 break;
2367 default:
2368 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2369 }
2370
2371 err = xe_vma_op_commit(vm, op);
2372 if (err)
2373 return err;
2374 }
2375
2376 return 0;
2377 }
2378
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2379 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2380 bool post_commit, bool prev_post_commit,
2381 bool next_post_commit)
2382 {
2383 lockdep_assert_held_write(&vm->lock);
2384
2385 switch (op->base.op) {
2386 case DRM_GPUVA_OP_MAP:
2387 if (op->map.vma) {
2388 prep_vma_destroy(vm, op->map.vma, post_commit);
2389 xe_vma_destroy_unlocked(op->map.vma);
2390 }
2391 break;
2392 case DRM_GPUVA_OP_UNMAP:
2393 {
2394 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2395
2396 if (vma) {
2397 down_read(&vm->userptr.notifier_lock);
2398 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2399 up_read(&vm->userptr.notifier_lock);
2400 if (post_commit)
2401 xe_vm_insert_vma(vm, vma);
2402 }
2403 break;
2404 }
2405 case DRM_GPUVA_OP_REMAP:
2406 {
2407 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2408
2409 if (op->remap.prev) {
2410 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2411 xe_vma_destroy_unlocked(op->remap.prev);
2412 }
2413 if (op->remap.next) {
2414 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2415 xe_vma_destroy_unlocked(op->remap.next);
2416 }
2417 if (vma) {
2418 down_read(&vm->userptr.notifier_lock);
2419 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2420 up_read(&vm->userptr.notifier_lock);
2421 if (post_commit)
2422 xe_vm_insert_vma(vm, vma);
2423 }
2424 break;
2425 }
2426 case DRM_GPUVA_OP_PREFETCH:
2427 /* Nothing to do */
2428 break;
2429 default:
2430 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2431 }
2432 }
2433
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2434 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2435 struct drm_gpuva_ops **ops,
2436 int num_ops_list)
2437 {
2438 int i;
2439
2440 for (i = num_ops_list - 1; i >= 0; --i) {
2441 struct drm_gpuva_ops *__ops = ops[i];
2442 struct drm_gpuva_op *__op;
2443
2444 if (!__ops)
2445 continue;
2446
2447 drm_gpuva_for_each_op_reverse(__op, __ops) {
2448 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2449
2450 xe_vma_op_unwind(vm, op,
2451 op->flags & XE_VMA_OP_COMMITTED,
2452 op->flags & XE_VMA_OP_PREV_COMMITTED,
2453 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2454 }
2455 }
2456 }
2457
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2458 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2459 bool validate)
2460 {
2461 struct xe_bo *bo = xe_vma_bo(vma);
2462 int err = 0;
2463
2464 if (bo) {
2465 if (!bo->vm)
2466 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2467 if (!err && validate)
2468 err = xe_bo_validate(bo, xe_vma_vm(vma), true);
2469 }
2470
2471 return err;
2472 }
2473
check_ufence(struct xe_vma * vma)2474 static int check_ufence(struct xe_vma *vma)
2475 {
2476 if (vma->ufence) {
2477 struct xe_user_fence * const f = vma->ufence;
2478
2479 if (!xe_sync_ufence_get_status(f))
2480 return -EBUSY;
2481
2482 vma->ufence = NULL;
2483 xe_sync_ufence_put(f);
2484 }
2485
2486 return 0;
2487 }
2488
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2489 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2490 struct xe_vma_op *op)
2491 {
2492 int err = 0;
2493
2494 switch (op->base.op) {
2495 case DRM_GPUVA_OP_MAP:
2496 err = vma_lock_and_validate(exec, op->map.vma,
2497 !xe_vm_in_fault_mode(vm) ||
2498 op->map.immediate);
2499 break;
2500 case DRM_GPUVA_OP_REMAP:
2501 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2502 if (err)
2503 break;
2504
2505 err = vma_lock_and_validate(exec,
2506 gpuva_to_vma(op->base.remap.unmap->va),
2507 false);
2508 if (!err && op->remap.prev)
2509 err = vma_lock_and_validate(exec, op->remap.prev, true);
2510 if (!err && op->remap.next)
2511 err = vma_lock_and_validate(exec, op->remap.next, true);
2512 break;
2513 case DRM_GPUVA_OP_UNMAP:
2514 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2515 if (err)
2516 break;
2517
2518 err = vma_lock_and_validate(exec,
2519 gpuva_to_vma(op->base.unmap.va),
2520 false);
2521 break;
2522 case DRM_GPUVA_OP_PREFETCH:
2523 {
2524 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2525 u32 region = op->prefetch.region;
2526
2527 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2528
2529 err = vma_lock_and_validate(exec,
2530 gpuva_to_vma(op->base.prefetch.va),
2531 false);
2532 if (!err && !xe_vma_has_no_bo(vma))
2533 err = xe_bo_migrate(xe_vma_bo(vma),
2534 region_to_mem_type[region]);
2535 break;
2536 }
2537 default:
2538 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2539 }
2540
2541 return err;
2542 }
2543
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2544 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2545 struct xe_vm *vm,
2546 struct xe_vma_ops *vops)
2547 {
2548 struct xe_vma_op *op;
2549 int err;
2550
2551 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2552 if (err)
2553 return err;
2554
2555 list_for_each_entry(op, &vops->list, link) {
2556 err = op_lock_and_prep(exec, vm, op);
2557 if (err)
2558 return err;
2559 }
2560
2561 #ifdef TEST_VM_OPS_ERROR
2562 if (vops->inject_error &&
2563 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2564 return -ENOSPC;
2565 #endif
2566
2567 return 0;
2568 }
2569
op_trace(struct xe_vma_op * op)2570 static void op_trace(struct xe_vma_op *op)
2571 {
2572 switch (op->base.op) {
2573 case DRM_GPUVA_OP_MAP:
2574 trace_xe_vma_bind(op->map.vma);
2575 break;
2576 case DRM_GPUVA_OP_REMAP:
2577 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2578 if (op->remap.prev)
2579 trace_xe_vma_bind(op->remap.prev);
2580 if (op->remap.next)
2581 trace_xe_vma_bind(op->remap.next);
2582 break;
2583 case DRM_GPUVA_OP_UNMAP:
2584 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2585 break;
2586 case DRM_GPUVA_OP_PREFETCH:
2587 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2588 break;
2589 default:
2590 XE_WARN_ON("NOT POSSIBLE");
2591 }
2592 }
2593
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)2594 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2595 {
2596 struct xe_vma_op *op;
2597
2598 list_for_each_entry(op, &vops->list, link)
2599 op_trace(op);
2600 }
2601
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)2602 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2603 {
2604 struct xe_exec_queue *q = vops->q;
2605 struct xe_tile *tile;
2606 int number_tiles = 0;
2607 u8 id;
2608
2609 for_each_tile(tile, vm->xe, id) {
2610 if (vops->pt_update_ops[id].num_ops)
2611 ++number_tiles;
2612
2613 if (vops->pt_update_ops[id].q)
2614 continue;
2615
2616 if (q) {
2617 vops->pt_update_ops[id].q = q;
2618 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2619 q = list_next_entry(q, multi_gt_list);
2620 } else {
2621 vops->pt_update_ops[id].q = vm->q[id];
2622 }
2623 }
2624
2625 return number_tiles;
2626 }
2627
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2628 static struct dma_fence *ops_execute(struct xe_vm *vm,
2629 struct xe_vma_ops *vops)
2630 {
2631 struct xe_tile *tile;
2632 struct dma_fence *fence = NULL;
2633 struct dma_fence **fences = NULL;
2634 struct dma_fence_array *cf = NULL;
2635 int number_tiles = 0, current_fence = 0, err;
2636 u8 id;
2637
2638 number_tiles = vm_ops_setup_tile_args(vm, vops);
2639 if (number_tiles == 0)
2640 return ERR_PTR(-ENODATA);
2641
2642 if (number_tiles > 1) {
2643 fences = kmalloc_array(number_tiles, sizeof(*fences),
2644 GFP_KERNEL);
2645 if (!fences) {
2646 fence = ERR_PTR(-ENOMEM);
2647 goto err_trace;
2648 }
2649 }
2650
2651 for_each_tile(tile, vm->xe, id) {
2652 if (!vops->pt_update_ops[id].num_ops)
2653 continue;
2654
2655 err = xe_pt_update_ops_prepare(tile, vops);
2656 if (err) {
2657 fence = ERR_PTR(err);
2658 goto err_out;
2659 }
2660 }
2661
2662 trace_xe_vm_ops_execute(vops);
2663
2664 for_each_tile(tile, vm->xe, id) {
2665 if (!vops->pt_update_ops[id].num_ops)
2666 continue;
2667
2668 fence = xe_pt_update_ops_run(tile, vops);
2669 if (IS_ERR(fence))
2670 goto err_out;
2671
2672 if (fences)
2673 fences[current_fence++] = fence;
2674 }
2675
2676 if (fences) {
2677 cf = dma_fence_array_create(number_tiles, fences,
2678 vm->composite_fence_ctx,
2679 vm->composite_fence_seqno++,
2680 false);
2681 if (!cf) {
2682 --vm->composite_fence_seqno;
2683 fence = ERR_PTR(-ENOMEM);
2684 goto err_out;
2685 }
2686 fence = &cf->base;
2687 }
2688
2689 for_each_tile(tile, vm->xe, id) {
2690 if (!vops->pt_update_ops[id].num_ops)
2691 continue;
2692
2693 xe_pt_update_ops_fini(tile, vops);
2694 }
2695
2696 return fence;
2697
2698 err_out:
2699 for_each_tile(tile, vm->xe, id) {
2700 if (!vops->pt_update_ops[id].num_ops)
2701 continue;
2702
2703 xe_pt_update_ops_abort(tile, vops);
2704 }
2705 while (current_fence)
2706 dma_fence_put(fences[--current_fence]);
2707 kfree(fences);
2708 kfree(cf);
2709
2710 err_trace:
2711 trace_xe_vm_ops_fail(vm);
2712 return fence;
2713 }
2714
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)2715 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2716 {
2717 if (vma->ufence)
2718 xe_sync_ufence_put(vma->ufence);
2719 vma->ufence = __xe_sync_ufence_get(ufence);
2720 }
2721
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)2722 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2723 struct xe_user_fence *ufence)
2724 {
2725 switch (op->base.op) {
2726 case DRM_GPUVA_OP_MAP:
2727 vma_add_ufence(op->map.vma, ufence);
2728 break;
2729 case DRM_GPUVA_OP_REMAP:
2730 if (op->remap.prev)
2731 vma_add_ufence(op->remap.prev, ufence);
2732 if (op->remap.next)
2733 vma_add_ufence(op->remap.next, ufence);
2734 break;
2735 case DRM_GPUVA_OP_UNMAP:
2736 break;
2737 case DRM_GPUVA_OP_PREFETCH:
2738 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2739 break;
2740 default:
2741 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2742 }
2743 }
2744
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)2745 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
2746 struct dma_fence *fence)
2747 {
2748 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
2749 struct xe_user_fence *ufence;
2750 struct xe_vma_op *op;
2751 int i;
2752
2753 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
2754 list_for_each_entry(op, &vops->list, link) {
2755 if (ufence)
2756 op_add_ufence(vm, op, ufence);
2757
2758 if (op->base.op == DRM_GPUVA_OP_UNMAP)
2759 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
2760 else if (op->base.op == DRM_GPUVA_OP_REMAP)
2761 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
2762 fence);
2763 }
2764 if (ufence)
2765 xe_sync_ufence_put(ufence);
2766 for (i = 0; i < vops->num_syncs; i++)
2767 xe_sync_entry_signal(vops->syncs + i, fence);
2768 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
2769 dma_fence_put(fence);
2770 }
2771
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)2772 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2773 struct xe_vma_ops *vops)
2774 {
2775 struct drm_exec exec;
2776 struct dma_fence *fence;
2777 int err;
2778
2779 lockdep_assert_held_write(&vm->lock);
2780
2781 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
2782 DRM_EXEC_IGNORE_DUPLICATES, 0);
2783 drm_exec_until_all_locked(&exec) {
2784 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
2785 drm_exec_retry_on_contention(&exec);
2786 if (err)
2787 goto unlock;
2788
2789 fence = ops_execute(vm, vops);
2790 if (IS_ERR(fence)) {
2791 err = PTR_ERR(fence);
2792 goto unlock;
2793 }
2794
2795 vm_bind_ioctl_ops_fini(vm, vops, fence);
2796 }
2797
2798 unlock:
2799 drm_exec_fini(&exec);
2800 return err;
2801 }
2802
2803 #define SUPPORTED_FLAGS_STUB \
2804 (DRM_XE_VM_BIND_FLAG_READONLY | \
2805 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2806 DRM_XE_VM_BIND_FLAG_NULL | \
2807 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2808
2809 #ifdef TEST_VM_OPS_ERROR
2810 #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
2811 #else
2812 #define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
2813 #endif
2814
2815 #define XE_64K_PAGE_MASK 0xffffull
2816 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2817
vm_bind_ioctl_check_args(struct xe_device * xe,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)2818 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2819 struct drm_xe_vm_bind *args,
2820 struct drm_xe_vm_bind_op **bind_ops)
2821 {
2822 int err;
2823 int i;
2824
2825 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2826 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2827 return -EINVAL;
2828
2829 if (XE_IOCTL_DBG(xe, args->extensions))
2830 return -EINVAL;
2831
2832 if (args->num_binds > 1) {
2833 u64 __user *bind_user =
2834 u64_to_user_ptr(args->vector_of_binds);
2835
2836 *bind_ops = kvmalloc_array(args->num_binds,
2837 sizeof(struct drm_xe_vm_bind_op),
2838 GFP_KERNEL | __GFP_ACCOUNT);
2839 if (!*bind_ops)
2840 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
2841
2842 err = __copy_from_user(*bind_ops, bind_user,
2843 sizeof(struct drm_xe_vm_bind_op) *
2844 args->num_binds);
2845 if (XE_IOCTL_DBG(xe, err)) {
2846 err = -EFAULT;
2847 goto free_bind_ops;
2848 }
2849 } else {
2850 *bind_ops = &args->bind;
2851 }
2852
2853 for (i = 0; i < args->num_binds; ++i) {
2854 u64 range = (*bind_ops)[i].range;
2855 u64 addr = (*bind_ops)[i].addr;
2856 u32 op = (*bind_ops)[i].op;
2857 u32 flags = (*bind_ops)[i].flags;
2858 u32 obj = (*bind_ops)[i].obj;
2859 u64 obj_offset = (*bind_ops)[i].obj_offset;
2860 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2861 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2862 u16 pat_index = (*bind_ops)[i].pat_index;
2863 u16 coh_mode;
2864
2865 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2866 err = -EINVAL;
2867 goto free_bind_ops;
2868 }
2869
2870 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2871 (*bind_ops)[i].pat_index = pat_index;
2872 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2873 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2874 err = -EINVAL;
2875 goto free_bind_ops;
2876 }
2877
2878 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2879 err = -EINVAL;
2880 goto free_bind_ops;
2881 }
2882
2883 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2884 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2885 XE_IOCTL_DBG(xe, obj && is_null) ||
2886 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2887 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2888 is_null) ||
2889 XE_IOCTL_DBG(xe, !obj &&
2890 op == DRM_XE_VM_BIND_OP_MAP &&
2891 !is_null) ||
2892 XE_IOCTL_DBG(xe, !obj &&
2893 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2894 XE_IOCTL_DBG(xe, addr &&
2895 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2896 XE_IOCTL_DBG(xe, range &&
2897 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2898 XE_IOCTL_DBG(xe, obj &&
2899 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2900 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2901 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2902 XE_IOCTL_DBG(xe, obj &&
2903 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2904 XE_IOCTL_DBG(xe, prefetch_region &&
2905 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2906 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2907 xe->info.mem_region_mask)) ||
2908 XE_IOCTL_DBG(xe, obj &&
2909 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2910 err = -EINVAL;
2911 goto free_bind_ops;
2912 }
2913
2914 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2915 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2916 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2917 XE_IOCTL_DBG(xe, !range &&
2918 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2919 err = -EINVAL;
2920 goto free_bind_ops;
2921 }
2922 }
2923
2924 return 0;
2925
2926 free_bind_ops:
2927 if (args->num_binds > 1)
2928 kvfree(*bind_ops);
2929 return err;
2930 }
2931
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)2932 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2933 struct xe_exec_queue *q,
2934 struct xe_sync_entry *syncs,
2935 int num_syncs)
2936 {
2937 struct dma_fence *fence;
2938 int i, err = 0;
2939
2940 fence = xe_sync_in_fence_get(syncs, num_syncs,
2941 to_wait_exec_queue(vm, q), vm);
2942 if (IS_ERR(fence))
2943 return PTR_ERR(fence);
2944
2945 for (i = 0; i < num_syncs; i++)
2946 xe_sync_entry_signal(&syncs[i], fence);
2947
2948 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2949 fence);
2950 dma_fence_put(fence);
2951
2952 return err;
2953 }
2954
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)2955 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
2956 struct xe_exec_queue *q,
2957 struct xe_sync_entry *syncs, u32 num_syncs)
2958 {
2959 memset(vops, 0, sizeof(*vops));
2960 INIT_LIST_HEAD(&vops->list);
2961 vops->vm = vm;
2962 vops->q = q;
2963 vops->syncs = syncs;
2964 vops->num_syncs = num_syncs;
2965 }
2966
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index)2967 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
2968 u64 addr, u64 range, u64 obj_offset,
2969 u16 pat_index)
2970 {
2971 u16 coh_mode;
2972
2973 if (XE_IOCTL_DBG(xe, range > bo->size) ||
2974 XE_IOCTL_DBG(xe, obj_offset >
2975 bo->size - range)) {
2976 return -EINVAL;
2977 }
2978
2979 /*
2980 * Some platforms require 64k VM_BIND alignment,
2981 * specifically those with XE_VRAM_FLAGS_NEED64K.
2982 *
2983 * Other platforms may have BO's set to 64k physical placement,
2984 * but can be mapped at 4k offsets anyway. This check is only
2985 * there for the former case.
2986 */
2987 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
2988 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
2989 if (XE_IOCTL_DBG(xe, obj_offset &
2990 XE_64K_PAGE_MASK) ||
2991 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2992 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2993 return -EINVAL;
2994 }
2995 }
2996
2997 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2998 if (bo->cpu_caching) {
2999 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3000 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3001 return -EINVAL;
3002 }
3003 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3004 /*
3005 * Imported dma-buf from a different device should
3006 * require 1way or 2way coherency since we don't know
3007 * how it was mapped on the CPU. Just assume is it
3008 * potentially cached on CPU side.
3009 */
3010 return -EINVAL;
3011 }
3012
3013 return 0;
3014 }
3015
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3016 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3017 {
3018 struct xe_device *xe = to_xe_device(dev);
3019 struct xe_file *xef = to_xe_file(file);
3020 struct drm_xe_vm_bind *args = data;
3021 struct drm_xe_sync __user *syncs_user;
3022 struct xe_bo **bos = NULL;
3023 struct drm_gpuva_ops **ops = NULL;
3024 struct xe_vm *vm;
3025 struct xe_exec_queue *q = NULL;
3026 u32 num_syncs, num_ufence = 0;
3027 struct xe_sync_entry *syncs = NULL;
3028 struct drm_xe_vm_bind_op *bind_ops;
3029 struct xe_vma_ops vops;
3030 int err;
3031 int i;
3032
3033 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
3034 if (err)
3035 return err;
3036
3037 if (args->exec_queue_id) {
3038 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3039 if (XE_IOCTL_DBG(xe, !q)) {
3040 err = -ENOENT;
3041 goto free_objs;
3042 }
3043
3044 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3045 err = -EINVAL;
3046 goto put_exec_queue;
3047 }
3048 }
3049
3050 vm = xe_vm_lookup(xef, args->vm_id);
3051 if (XE_IOCTL_DBG(xe, !vm)) {
3052 err = -EINVAL;
3053 goto put_exec_queue;
3054 }
3055
3056 err = down_write_killable(&vm->lock);
3057 if (err)
3058 goto put_vm;
3059
3060 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3061 err = -ENOENT;
3062 goto release_vm_lock;
3063 }
3064
3065 for (i = 0; i < args->num_binds; ++i) {
3066 u64 range = bind_ops[i].range;
3067 u64 addr = bind_ops[i].addr;
3068
3069 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3070 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3071 err = -EINVAL;
3072 goto release_vm_lock;
3073 }
3074 }
3075
3076 if (args->num_binds) {
3077 bos = kvcalloc(args->num_binds, sizeof(*bos),
3078 GFP_KERNEL | __GFP_ACCOUNT);
3079 if (!bos) {
3080 err = -ENOMEM;
3081 goto release_vm_lock;
3082 }
3083
3084 ops = kvcalloc(args->num_binds, sizeof(*ops),
3085 GFP_KERNEL | __GFP_ACCOUNT);
3086 if (!ops) {
3087 err = -ENOMEM;
3088 goto release_vm_lock;
3089 }
3090 }
3091
3092 for (i = 0; i < args->num_binds; ++i) {
3093 struct drm_gem_object *gem_obj;
3094 u64 range = bind_ops[i].range;
3095 u64 addr = bind_ops[i].addr;
3096 u32 obj = bind_ops[i].obj;
3097 u64 obj_offset = bind_ops[i].obj_offset;
3098 u16 pat_index = bind_ops[i].pat_index;
3099
3100 if (!obj)
3101 continue;
3102
3103 gem_obj = drm_gem_object_lookup(file, obj);
3104 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3105 err = -ENOENT;
3106 goto put_obj;
3107 }
3108 bos[i] = gem_to_xe_bo(gem_obj);
3109
3110 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3111 obj_offset, pat_index);
3112 if (err)
3113 goto put_obj;
3114 }
3115
3116 if (args->num_syncs) {
3117 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3118 if (!syncs) {
3119 err = -ENOMEM;
3120 goto put_obj;
3121 }
3122 }
3123
3124 syncs_user = u64_to_user_ptr(args->syncs);
3125 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3126 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3127 &syncs_user[num_syncs],
3128 (xe_vm_in_lr_mode(vm) ?
3129 SYNC_PARSE_FLAG_LR_MODE : 0) |
3130 (!args->num_binds ?
3131 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3132 if (err)
3133 goto free_syncs;
3134
3135 if (xe_sync_is_ufence(&syncs[num_syncs]))
3136 num_ufence++;
3137 }
3138
3139 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3140 err = -EINVAL;
3141 goto free_syncs;
3142 }
3143
3144 if (!args->num_binds) {
3145 err = -ENODATA;
3146 goto free_syncs;
3147 }
3148
3149 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3150 for (i = 0; i < args->num_binds; ++i) {
3151 u64 range = bind_ops[i].range;
3152 u64 addr = bind_ops[i].addr;
3153 u32 op = bind_ops[i].op;
3154 u32 flags = bind_ops[i].flags;
3155 u64 obj_offset = bind_ops[i].obj_offset;
3156 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3157 u16 pat_index = bind_ops[i].pat_index;
3158
3159 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3160 addr, range, op, flags,
3161 prefetch_region, pat_index);
3162 if (IS_ERR(ops[i])) {
3163 err = PTR_ERR(ops[i]);
3164 ops[i] = NULL;
3165 goto unwind_ops;
3166 }
3167
3168 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3169 if (err)
3170 goto unwind_ops;
3171
3172 #ifdef TEST_VM_OPS_ERROR
3173 if (flags & FORCE_OP_ERROR) {
3174 vops.inject_error = true;
3175 vm->xe->vm_inject_error_position =
3176 (vm->xe->vm_inject_error_position + 1) %
3177 FORCE_OP_ERROR_COUNT;
3178 }
3179 #endif
3180 }
3181
3182 /* Nothing to do */
3183 if (list_empty(&vops.list)) {
3184 err = -ENODATA;
3185 goto unwind_ops;
3186 }
3187
3188 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3189 if (err)
3190 goto unwind_ops;
3191
3192 err = vm_bind_ioctl_ops_execute(vm, &vops);
3193
3194 unwind_ops:
3195 if (err && err != -ENODATA)
3196 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3197 xe_vma_ops_fini(&vops);
3198 for (i = args->num_binds - 1; i >= 0; --i)
3199 if (ops[i])
3200 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3201 free_syncs:
3202 if (err == -ENODATA)
3203 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3204 while (num_syncs--)
3205 xe_sync_entry_cleanup(&syncs[num_syncs]);
3206
3207 kfree(syncs);
3208 put_obj:
3209 for (i = 0; i < args->num_binds; ++i)
3210 xe_bo_put(bos[i]);
3211 release_vm_lock:
3212 up_write(&vm->lock);
3213 put_vm:
3214 xe_vm_put(vm);
3215 put_exec_queue:
3216 if (q)
3217 xe_exec_queue_put(q);
3218 free_objs:
3219 kvfree(bos);
3220 kvfree(ops);
3221 if (args->num_binds > 1)
3222 kvfree(bind_ops);
3223 return err;
3224 }
3225
3226 /**
3227 * xe_vm_lock() - Lock the vm's dma_resv object
3228 * @vm: The struct xe_vm whose lock is to be locked
3229 * @intr: Whether to perform any wait interruptible
3230 *
3231 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3232 * contended lock was interrupted. If @intr is false, the function
3233 * always returns 0.
3234 */
xe_vm_lock(struct xe_vm * vm,bool intr)3235 int xe_vm_lock(struct xe_vm *vm, bool intr)
3236 {
3237 if (intr)
3238 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3239
3240 return dma_resv_lock(xe_vm_resv(vm), NULL);
3241 }
3242
3243 /**
3244 * xe_vm_unlock() - Unlock the vm's dma_resv object
3245 * @vm: The struct xe_vm whose lock is to be released.
3246 *
3247 * Unlock a buffer object lock that was locked by xe_vm_lock().
3248 */
xe_vm_unlock(struct xe_vm * vm)3249 void xe_vm_unlock(struct xe_vm *vm)
3250 {
3251 dma_resv_unlock(xe_vm_resv(vm));
3252 }
3253
3254 /**
3255 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3256 * @vma: VMA to invalidate
3257 *
3258 * Walks a list of page tables leaves which it memset the entries owned by this
3259 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3260 * complete.
3261 *
3262 * Returns 0 for success, negative error code otherwise.
3263 */
xe_vm_invalidate_vma(struct xe_vma * vma)3264 int xe_vm_invalidate_vma(struct xe_vma *vma)
3265 {
3266 struct xe_device *xe = xe_vma_vm(vma)->xe;
3267 struct xe_tile *tile;
3268 struct xe_gt_tlb_invalidation_fence
3269 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3270 u8 id;
3271 u32 fence_id = 0;
3272 int ret = 0;
3273
3274 xe_assert(xe, !xe_vma_is_null(vma));
3275 trace_xe_vma_invalidate(vma);
3276
3277 vm_dbg(&xe_vma_vm(vma)->xe->drm,
3278 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3279 xe_vma_start(vma), xe_vma_size(vma));
3280
3281 /* Check that we don't race with page-table updates */
3282 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3283 if (xe_vma_is_userptr(vma)) {
3284 WARN_ON_ONCE(!mmu_interval_check_retry
3285 (&to_userptr_vma(vma)->userptr.notifier,
3286 to_userptr_vma(vma)->userptr.notifier_seq));
3287 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3288 DMA_RESV_USAGE_BOOKKEEP));
3289
3290 } else {
3291 xe_bo_assert_held(xe_vma_bo(vma));
3292 }
3293 }
3294
3295 for_each_tile(tile, xe, id) {
3296 if (xe_pt_zap_ptes(tile, vma)) {
3297 xe_device_wmb(xe);
3298 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3299 &fence[fence_id],
3300 true);
3301
3302 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3303 &fence[fence_id], vma);
3304 if (ret)
3305 goto wait;
3306 ++fence_id;
3307
3308 if (!tile->media_gt)
3309 continue;
3310
3311 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3312 &fence[fence_id],
3313 true);
3314
3315 ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3316 &fence[fence_id], vma);
3317 if (ret)
3318 goto wait;
3319 ++fence_id;
3320 }
3321 }
3322
3323 wait:
3324 for (id = 0; id < fence_id; ++id)
3325 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3326
3327 vma->tile_invalidated = vma->tile_mask;
3328
3329 return ret;
3330 }
3331
3332 struct xe_vm_snapshot {
3333 unsigned long num_snaps;
3334 struct {
3335 u64 ofs, bo_ofs;
3336 unsigned long len;
3337 struct xe_bo *bo;
3338 void *data;
3339 struct mm_struct *mm;
3340 } snap[];
3341 };
3342
xe_vm_snapshot_capture(struct xe_vm * vm)3343 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3344 {
3345 unsigned long num_snaps = 0, i;
3346 struct xe_vm_snapshot *snap = NULL;
3347 struct drm_gpuva *gpuva;
3348
3349 if (!vm)
3350 return NULL;
3351
3352 mutex_lock(&vm->snap_mutex);
3353 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3354 if (gpuva->flags & XE_VMA_DUMPABLE)
3355 num_snaps++;
3356 }
3357
3358 if (num_snaps)
3359 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3360 if (!snap) {
3361 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3362 goto out_unlock;
3363 }
3364
3365 snap->num_snaps = num_snaps;
3366 i = 0;
3367 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3368 struct xe_vma *vma = gpuva_to_vma(gpuva);
3369 struct xe_bo *bo = vma->gpuva.gem.obj ?
3370 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3371
3372 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3373 continue;
3374
3375 snap->snap[i].ofs = xe_vma_start(vma);
3376 snap->snap[i].len = xe_vma_size(vma);
3377 if (bo) {
3378 snap->snap[i].bo = xe_bo_get(bo);
3379 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3380 } else if (xe_vma_is_userptr(vma)) {
3381 struct mm_struct *mm =
3382 to_userptr_vma(vma)->userptr.notifier.mm;
3383
3384 if (mmget_not_zero(mm))
3385 snap->snap[i].mm = mm;
3386 else
3387 snap->snap[i].data = ERR_PTR(-EFAULT);
3388
3389 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3390 } else {
3391 snap->snap[i].data = ERR_PTR(-ENOENT);
3392 }
3393 i++;
3394 }
3395
3396 out_unlock:
3397 mutex_unlock(&vm->snap_mutex);
3398 return snap;
3399 }
3400
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)3401 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3402 {
3403 if (IS_ERR_OR_NULL(snap))
3404 return;
3405
3406 for (int i = 0; i < snap->num_snaps; i++) {
3407 struct xe_bo *bo = snap->snap[i].bo;
3408 struct iosys_map src;
3409 int err;
3410
3411 if (IS_ERR(snap->snap[i].data))
3412 continue;
3413
3414 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3415 if (!snap->snap[i].data) {
3416 snap->snap[i].data = ERR_PTR(-ENOMEM);
3417 goto cleanup_bo;
3418 }
3419
3420 if (bo) {
3421 xe_bo_lock(bo, false);
3422 err = ttm_bo_vmap(&bo->ttm, &src);
3423 if (!err) {
3424 xe_map_memcpy_from(xe_bo_device(bo),
3425 snap->snap[i].data,
3426 &src, snap->snap[i].bo_ofs,
3427 snap->snap[i].len);
3428 ttm_bo_vunmap(&bo->ttm, &src);
3429 }
3430 xe_bo_unlock(bo);
3431 } else {
3432 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3433
3434 kthread_use_mm(snap->snap[i].mm);
3435 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3436 err = 0;
3437 else
3438 err = -EFAULT;
3439 kthread_unuse_mm(snap->snap[i].mm);
3440
3441 mmput(snap->snap[i].mm);
3442 snap->snap[i].mm = NULL;
3443 }
3444
3445 if (err) {
3446 kvfree(snap->snap[i].data);
3447 snap->snap[i].data = ERR_PTR(err);
3448 }
3449
3450 cleanup_bo:
3451 xe_bo_put(bo);
3452 snap->snap[i].bo = NULL;
3453 }
3454 }
3455
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)3456 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3457 {
3458 unsigned long i, j;
3459
3460 if (IS_ERR_OR_NULL(snap)) {
3461 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3462 return;
3463 }
3464
3465 for (i = 0; i < snap->num_snaps; i++) {
3466 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3467
3468 if (IS_ERR(snap->snap[i].data)) {
3469 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3470 PTR_ERR(snap->snap[i].data));
3471 continue;
3472 }
3473
3474 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3475
3476 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3477 u32 *val = snap->snap[i].data + j;
3478 char dumped[ASCII85_BUFSZ];
3479
3480 drm_puts(p, ascii85_encode(*val, dumped));
3481 }
3482
3483 drm_puts(p, "\n");
3484 }
3485 }
3486
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)3487 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3488 {
3489 unsigned long i;
3490
3491 if (IS_ERR_OR_NULL(snap))
3492 return;
3493
3494 for (i = 0; i < snap->num_snaps; i++) {
3495 if (!IS_ERR(snap->snap[i].data))
3496 kvfree(snap->snap[i].data);
3497 xe_bo_put(snap->snap[i].bo);
3498 if (snap->snap[i].mm)
3499 mmput(snap->snap[i].mm);
3500 }
3501 kvfree(snap);
3502 }
3503