1 /*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2012-2014 Intel Corporation
5 */
6
7 #include <linux/mmu_context.h>
8 #include <linux/mmu_notifier.h>
9 #include <linux/mempolicy.h>
10 #include <linux/swap.h>
11 #include <linux/sched/mm.h>
12
13 #include "i915_drv.h"
14 #include "i915_gem_ioctls.h"
15 #include "i915_gem_object.h"
16 #include "i915_scatterlist.h"
17
18 struct i915_mm_struct {
19 struct mm_struct *mm;
20 struct drm_i915_private *i915;
21 struct i915_mmu_notifier *mn;
22 struct hlist_node node;
23 struct kref kref;
24 struct rcu_work work;
25 };
26
27 #if defined(CONFIG_MMU_NOTIFIER)
28 #include <linux/interval_tree.h>
29
30 struct i915_mmu_notifier {
31 spinlock_t lock;
32 struct hlist_node node;
33 struct mmu_notifier mn;
34 struct rb_root_cached objects;
35 struct i915_mm_struct *mm;
36 };
37
38 struct i915_mmu_object {
39 struct i915_mmu_notifier *mn;
40 struct drm_i915_gem_object *obj;
41 struct interval_tree_node it;
42 };
43
add_object(struct i915_mmu_object * mo)44 static void add_object(struct i915_mmu_object *mo)
45 {
46 GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
47 interval_tree_insert(&mo->it, &mo->mn->objects);
48 }
49
del_object(struct i915_mmu_object * mo)50 static void del_object(struct i915_mmu_object *mo)
51 {
52 if (RB_EMPTY_NODE(&mo->it.rb))
53 return;
54
55 interval_tree_remove(&mo->it, &mo->mn->objects);
56 RB_CLEAR_NODE(&mo->it.rb);
57 }
58
59 static void
__i915_gem_userptr_set_active(struct drm_i915_gem_object * obj,bool value)60 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
61 {
62 struct i915_mmu_object *mo = obj->userptr.mmu_object;
63
64 /*
65 * During mm_invalidate_range we need to cancel any userptr that
66 * overlaps the range being invalidated. Doing so requires the
67 * struct_mutex, and that risks recursion. In order to cause
68 * recursion, the user must alias the userptr address space with
69 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
70 * to invalidate that mmaping, mm_invalidate_range is called with
71 * the userptr address *and* the struct_mutex held. To prevent that
72 * we set a flag under the i915_mmu_notifier spinlock to indicate
73 * whether this object is valid.
74 */
75 if (!mo)
76 return;
77
78 spin_lock(&mo->mn->lock);
79 if (value)
80 add_object(mo);
81 else
82 del_object(mo);
83 spin_unlock(&mo->mn->lock);
84 }
85
86 static int
userptr_mn_invalidate_range_start(struct mmu_notifier * _mn,const struct mmu_notifier_range * range)87 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
88 const struct mmu_notifier_range *range)
89 {
90 struct i915_mmu_notifier *mn =
91 container_of(_mn, struct i915_mmu_notifier, mn);
92 struct interval_tree_node *it;
93 unsigned long end;
94 int ret = 0;
95
96 if (RB_EMPTY_ROOT(&mn->objects.rb_root))
97 return 0;
98
99 /* interval ranges are inclusive, but invalidate range is exclusive */
100 end = range->end - 1;
101
102 spin_lock(&mn->lock);
103 it = interval_tree_iter_first(&mn->objects, range->start, end);
104 while (it) {
105 struct drm_i915_gem_object *obj;
106
107 if (!mmu_notifier_range_blockable(range)) {
108 ret = -EAGAIN;
109 break;
110 }
111
112 /*
113 * The mmu_object is released late when destroying the
114 * GEM object so it is entirely possible to gain a
115 * reference on an object in the process of being freed
116 * since our serialisation is via the spinlock and not
117 * the struct_mutex - and consequently use it after it
118 * is freed and then double free it. To prevent that
119 * use-after-free we only acquire a reference on the
120 * object if it is not in the process of being destroyed.
121 */
122 obj = container_of(it, struct i915_mmu_object, it)->obj;
123 if (!kref_get_unless_zero(&obj->base.refcount)) {
124 it = interval_tree_iter_next(it, range->start, end);
125 continue;
126 }
127 spin_unlock(&mn->lock);
128
129 ret = i915_gem_object_unbind(obj,
130 I915_GEM_OBJECT_UNBIND_ACTIVE |
131 I915_GEM_OBJECT_UNBIND_BARRIER);
132 if (ret == 0)
133 ret = __i915_gem_object_put_pages(obj);
134 i915_gem_object_put(obj);
135 if (ret)
136 return ret;
137
138 spin_lock(&mn->lock);
139
140 /*
141 * As we do not (yet) protect the mmu from concurrent insertion
142 * over this range, there is no guarantee that this search will
143 * terminate given a pathologic workload.
144 */
145 it = interval_tree_iter_first(&mn->objects, range->start, end);
146 }
147 spin_unlock(&mn->lock);
148
149 return ret;
150
151 }
152
153 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
154 .invalidate_range_start = userptr_mn_invalidate_range_start,
155 };
156
157 static struct i915_mmu_notifier *
i915_mmu_notifier_create(struct i915_mm_struct * mm)158 i915_mmu_notifier_create(struct i915_mm_struct *mm)
159 {
160 struct i915_mmu_notifier *mn;
161
162 mn = kmalloc(sizeof(*mn), GFP_KERNEL);
163 if (mn == NULL)
164 return ERR_PTR(-ENOMEM);
165
166 spin_lock_init(&mn->lock);
167 mn->mn.ops = &i915_gem_userptr_notifier;
168 mn->objects = RB_ROOT_CACHED;
169 mn->mm = mm;
170
171 return mn;
172 }
173
174 static void
i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object * obj)175 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
176 {
177 struct i915_mmu_object *mo;
178
179 mo = fetch_and_zero(&obj->userptr.mmu_object);
180 if (!mo)
181 return;
182
183 spin_lock(&mo->mn->lock);
184 del_object(mo);
185 spin_unlock(&mo->mn->lock);
186 kfree(mo);
187 }
188
189 static struct i915_mmu_notifier *
i915_mmu_notifier_find(struct i915_mm_struct * mm)190 i915_mmu_notifier_find(struct i915_mm_struct *mm)
191 {
192 struct i915_mmu_notifier *mn, *old;
193 int err;
194
195 mn = READ_ONCE(mm->mn);
196 if (likely(mn))
197 return mn;
198
199 mn = i915_mmu_notifier_create(mm);
200 if (IS_ERR(mn))
201 return mn;
202
203 err = mmu_notifier_register(&mn->mn, mm->mm);
204 if (err) {
205 kfree(mn);
206 return ERR_PTR(err);
207 }
208
209 old = cmpxchg(&mm->mn, NULL, mn);
210 if (old) {
211 mmu_notifier_unregister(&mn->mn, mm->mm);
212 kfree(mn);
213 mn = old;
214 }
215
216 return mn;
217 }
218
219 static int
i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object * obj,unsigned flags)220 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
221 unsigned flags)
222 {
223 struct i915_mmu_notifier *mn;
224 struct i915_mmu_object *mo;
225
226 if (flags & I915_USERPTR_UNSYNCHRONIZED)
227 return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
228
229 if (GEM_WARN_ON(!obj->userptr.mm))
230 return -EINVAL;
231
232 mn = i915_mmu_notifier_find(obj->userptr.mm);
233 if (IS_ERR(mn))
234 return PTR_ERR(mn);
235
236 mo = kzalloc(sizeof(*mo), GFP_KERNEL);
237 if (!mo)
238 return -ENOMEM;
239
240 mo->mn = mn;
241 mo->obj = obj;
242 mo->it.start = obj->userptr.ptr;
243 mo->it.last = obj->userptr.ptr + obj->base.size - 1;
244 RB_CLEAR_NODE(&mo->it.rb);
245
246 obj->userptr.mmu_object = mo;
247 return 0;
248 }
249
250 static void
i915_mmu_notifier_free(struct i915_mmu_notifier * mn,struct mm_struct * mm)251 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
252 struct mm_struct *mm)
253 {
254 if (mn == NULL)
255 return;
256
257 mmu_notifier_unregister(&mn->mn, mm);
258 kfree(mn);
259 }
260
261 #else
262
263 static void
__i915_gem_userptr_set_active(struct drm_i915_gem_object * obj,bool value)264 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
265 {
266 }
267
268 static void
i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object * obj)269 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
270 {
271 }
272
273 static int
i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object * obj,unsigned flags)274 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
275 unsigned flags)
276 {
277 if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
278 return -ENODEV;
279
280 if (!capable(CAP_SYS_ADMIN))
281 return -EPERM;
282
283 return 0;
284 }
285
286 static void
i915_mmu_notifier_free(struct i915_mmu_notifier * mn,struct mm_struct * mm)287 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
288 struct mm_struct *mm)
289 {
290 }
291
292 #endif
293
294 static struct i915_mm_struct *
__i915_mm_struct_find(struct drm_i915_private * i915,struct mm_struct * real)295 __i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real)
296 {
297 struct i915_mm_struct *it, *mm = NULL;
298
299 rcu_read_lock();
300 hash_for_each_possible_rcu(i915->mm_structs,
301 it, node,
302 (unsigned long)real)
303 if (it->mm == real && kref_get_unless_zero(&it->kref)) {
304 mm = it;
305 break;
306 }
307 rcu_read_unlock();
308
309 return mm;
310 }
311
312 static int
i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object * obj)313 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
314 {
315 struct drm_i915_private *i915 = to_i915(obj->base.dev);
316 struct i915_mm_struct *mm, *new;
317 int ret = 0;
318
319 /* During release of the GEM object we hold the struct_mutex. This
320 * precludes us from calling mmput() at that time as that may be
321 * the last reference and so call exit_mmap(). exit_mmap() will
322 * attempt to reap the vma, and if we were holding a GTT mmap
323 * would then call drm_gem_vm_close() and attempt to reacquire
324 * the struct mutex. So in order to avoid that recursion, we have
325 * to defer releasing the mm reference until after we drop the
326 * struct_mutex, i.e. we need to schedule a worker to do the clean
327 * up.
328 */
329 mm = __i915_mm_struct_find(i915, current->mm);
330 if (mm)
331 goto out;
332
333 new = kmalloc(sizeof(*mm), GFP_KERNEL);
334 if (!new)
335 return -ENOMEM;
336
337 kref_init(&new->kref);
338 new->i915 = to_i915(obj->base.dev);
339 new->mm = current->mm;
340 new->mn = NULL;
341
342 spin_lock(&i915->mm_lock);
343 mm = __i915_mm_struct_find(i915, current->mm);
344 if (!mm) {
345 hash_add_rcu(i915->mm_structs,
346 &new->node,
347 (unsigned long)new->mm);
348 mmgrab(current->mm);
349 mm = new;
350 }
351 spin_unlock(&i915->mm_lock);
352 if (mm != new)
353 kfree(new);
354
355 out:
356 obj->userptr.mm = mm;
357 return ret;
358 }
359
360 static void
__i915_mm_struct_free__worker(struct work_struct * work)361 __i915_mm_struct_free__worker(struct work_struct *work)
362 {
363 struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work);
364
365 i915_mmu_notifier_free(mm->mn, mm->mm);
366 mmdrop(mm->mm);
367 kfree(mm);
368 }
369
370 static void
__i915_mm_struct_free(struct kref * kref)371 __i915_mm_struct_free(struct kref *kref)
372 {
373 struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
374
375 spin_lock(&mm->i915->mm_lock);
376 hash_del_rcu(&mm->node);
377 spin_unlock(&mm->i915->mm_lock);
378
379 INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker);
380 queue_rcu_work(system_wq, &mm->work);
381 }
382
383 static void
i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object * obj)384 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
385 {
386 if (obj->userptr.mm == NULL)
387 return;
388
389 kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free);
390 obj->userptr.mm = NULL;
391 }
392
393 struct get_pages_work {
394 struct work_struct work;
395 struct drm_i915_gem_object *obj;
396 struct task_struct *task;
397 };
398
399 static struct sg_table *
__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object * obj,struct page ** pvec,unsigned long num_pages)400 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
401 struct page **pvec, unsigned long num_pages)
402 {
403 unsigned int max_segment = i915_sg_segment_size();
404 struct sg_table *st;
405 unsigned int sg_page_sizes;
406 struct scatterlist *sg;
407 int ret;
408
409 st = kmalloc(sizeof(*st), GFP_KERNEL);
410 if (!st)
411 return ERR_PTR(-ENOMEM);
412
413 alloc_table:
414 sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
415 num_pages << PAGE_SHIFT, max_segment,
416 NULL, 0, GFP_KERNEL);
417 if (IS_ERR(sg)) {
418 kfree(st);
419 return ERR_CAST(sg);
420 }
421
422 ret = i915_gem_gtt_prepare_pages(obj, st);
423 if (ret) {
424 sg_free_table(st);
425
426 if (max_segment > PAGE_SIZE) {
427 max_segment = PAGE_SIZE;
428 goto alloc_table;
429 }
430
431 kfree(st);
432 return ERR_PTR(ret);
433 }
434
435 sg_page_sizes = i915_sg_page_sizes(st->sgl);
436
437 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
438
439 return st;
440 }
441
442 static void
__i915_gem_userptr_get_pages_worker(struct work_struct * _work)443 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
444 {
445 struct get_pages_work *work = container_of(_work, typeof(*work), work);
446 struct drm_i915_gem_object *obj = work->obj;
447 const unsigned long npages = obj->base.size >> PAGE_SHIFT;
448 unsigned long pinned;
449 struct page **pvec;
450 int ret;
451
452 ret = -ENOMEM;
453 pinned = 0;
454
455 pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
456 if (pvec != NULL) {
457 struct mm_struct *mm = obj->userptr.mm->mm;
458 unsigned int flags = 0;
459 int locked = 0;
460
461 if (!i915_gem_object_is_readonly(obj))
462 flags |= FOLL_WRITE;
463
464 ret = -EFAULT;
465 if (mmget_not_zero(mm)) {
466 while (pinned < npages) {
467 if (!locked) {
468 mmap_read_lock(mm);
469 locked = 1;
470 }
471 ret = pin_user_pages_remote
472 (mm,
473 obj->userptr.ptr + pinned * PAGE_SIZE,
474 npages - pinned,
475 flags,
476 pvec + pinned, NULL, &locked);
477 if (ret < 0)
478 break;
479
480 pinned += ret;
481 }
482 if (locked)
483 mmap_read_unlock(mm);
484 mmput(mm);
485 }
486 }
487
488 mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
489 if (obj->userptr.work == &work->work) {
490 struct sg_table *pages = ERR_PTR(ret);
491
492 if (pinned == npages) {
493 pages = __i915_gem_userptr_alloc_pages(obj, pvec,
494 npages);
495 if (!IS_ERR(pages)) {
496 pinned = 0;
497 pages = NULL;
498 }
499 }
500
501 obj->userptr.work = ERR_CAST(pages);
502 if (IS_ERR(pages))
503 __i915_gem_userptr_set_active(obj, false);
504 }
505 mutex_unlock(&obj->mm.lock);
506
507 unpin_user_pages(pvec, pinned);
508 kvfree(pvec);
509
510 i915_gem_object_put(obj);
511 put_task_struct(work->task);
512 kfree(work);
513 }
514
515 static struct sg_table *
__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object * obj)516 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
517 {
518 struct get_pages_work *work;
519
520 /* Spawn a worker so that we can acquire the
521 * user pages without holding our mutex. Access
522 * to the user pages requires mmap_lock, and we have
523 * a strict lock ordering of mmap_lock, struct_mutex -
524 * we already hold struct_mutex here and so cannot
525 * call gup without encountering a lock inversion.
526 *
527 * Userspace will keep on repeating the operation
528 * (thanks to EAGAIN) until either we hit the fast
529 * path or the worker completes. If the worker is
530 * cancelled or superseded, the task is still run
531 * but the results ignored. (This leads to
532 * complications that we may have a stray object
533 * refcount that we need to be wary of when
534 * checking for existing objects during creation.)
535 * If the worker encounters an error, it reports
536 * that error back to this function through
537 * obj->userptr.work = ERR_PTR.
538 */
539 work = kmalloc(sizeof(*work), GFP_KERNEL);
540 if (work == NULL)
541 return ERR_PTR(-ENOMEM);
542
543 obj->userptr.work = &work->work;
544
545 work->obj = i915_gem_object_get(obj);
546
547 work->task = current;
548 get_task_struct(work->task);
549
550 INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
551 queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
552
553 return ERR_PTR(-EAGAIN);
554 }
555
i915_gem_userptr_get_pages(struct drm_i915_gem_object * obj)556 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
557 {
558 const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
559 struct mm_struct *mm = obj->userptr.mm->mm;
560 struct page **pvec;
561 struct sg_table *pages;
562 bool active;
563 int pinned;
564 unsigned int gup_flags = 0;
565
566 /* If userspace should engineer that these pages are replaced in
567 * the vma between us binding this page into the GTT and completion
568 * of rendering... Their loss. If they change the mapping of their
569 * pages they need to create a new bo to point to the new vma.
570 *
571 * However, that still leaves open the possibility of the vma
572 * being copied upon fork. Which falls under the same userspace
573 * synchronisation issue as a regular bo, except that this time
574 * the process may not be expecting that a particular piece of
575 * memory is tied to the GPU.
576 *
577 * Fortunately, we can hook into the mmu_notifier in order to
578 * discard the page references prior to anything nasty happening
579 * to the vma (discard or cloning) which should prevent the more
580 * egregious cases from causing harm.
581 */
582
583 if (obj->userptr.work) {
584 /* active flag should still be held for the pending work */
585 if (IS_ERR(obj->userptr.work))
586 return PTR_ERR(obj->userptr.work);
587 else
588 return -EAGAIN;
589 }
590
591 pvec = NULL;
592 pinned = 0;
593
594 if (mm == current->mm) {
595 pvec = kvmalloc_array(num_pages, sizeof(struct page *),
596 GFP_KERNEL |
597 __GFP_NORETRY |
598 __GFP_NOWARN);
599 if (pvec) {
600 /* defer to worker if malloc fails */
601 if (!i915_gem_object_is_readonly(obj))
602 gup_flags |= FOLL_WRITE;
603 pinned = pin_user_pages_fast_only(obj->userptr.ptr,
604 num_pages, gup_flags,
605 pvec);
606 }
607 }
608
609 active = false;
610 if (pinned < 0) {
611 pages = ERR_PTR(pinned);
612 pinned = 0;
613 } else if (pinned < num_pages) {
614 pages = __i915_gem_userptr_get_pages_schedule(obj);
615 active = pages == ERR_PTR(-EAGAIN);
616 } else {
617 pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
618 active = !IS_ERR(pages);
619 }
620 if (active)
621 __i915_gem_userptr_set_active(obj, true);
622
623 if (IS_ERR(pages))
624 unpin_user_pages(pvec, pinned);
625 kvfree(pvec);
626
627 return PTR_ERR_OR_ZERO(pages);
628 }
629
630 static void
i915_gem_userptr_put_pages(struct drm_i915_gem_object * obj,struct sg_table * pages)631 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
632 struct sg_table *pages)
633 {
634 struct sgt_iter sgt_iter;
635 struct page *page;
636
637 /* Cancel any inflight work and force them to restart their gup */
638 obj->userptr.work = NULL;
639 __i915_gem_userptr_set_active(obj, false);
640 if (!pages)
641 return;
642
643 __i915_gem_object_release_shmem(obj, pages, true);
644 i915_gem_gtt_finish_pages(obj, pages);
645
646 /*
647 * We always mark objects as dirty when they are used by the GPU,
648 * just in case. However, if we set the vma as being read-only we know
649 * that the object will never have been written to.
650 */
651 if (i915_gem_object_is_readonly(obj))
652 obj->mm.dirty = false;
653
654 for_each_sgt_page(page, sgt_iter, pages) {
655 if (obj->mm.dirty && trylock_page(page)) {
656 /*
657 * As this may not be anonymous memory (e.g. shmem)
658 * but exist on a real mapping, we have to lock
659 * the page in order to dirty it -- holding
660 * the page reference is not sufficient to
661 * prevent the inode from being truncated.
662 * Play safe and take the lock.
663 *
664 * However...!
665 *
666 * The mmu-notifier can be invalidated for a
667 * migrate_page, that is alreadying holding the lock
668 * on the page. Such a try_to_unmap() will result
669 * in us calling put_pages() and so recursively try
670 * to lock the page. We avoid that deadlock with
671 * a trylock_page() and in exchange we risk missing
672 * some page dirtying.
673 */
674 set_page_dirty(page);
675 unlock_page(page);
676 }
677
678 mark_page_accessed(page);
679 unpin_user_page(page);
680 }
681 obj->mm.dirty = false;
682
683 sg_free_table(pages);
684 kfree(pages);
685 }
686
687 static void
i915_gem_userptr_release(struct drm_i915_gem_object * obj)688 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
689 {
690 i915_gem_userptr_release__mmu_notifier(obj);
691 i915_gem_userptr_release__mm_struct(obj);
692 }
693
694 static int
i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object * obj)695 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
696 {
697 if (obj->userptr.mmu_object)
698 return 0;
699
700 return i915_gem_userptr_init__mmu_notifier(obj, 0);
701 }
702
703 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
704 .name = "i915_gem_object_userptr",
705 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
706 I915_GEM_OBJECT_IS_SHRINKABLE |
707 I915_GEM_OBJECT_NO_MMAP |
708 I915_GEM_OBJECT_ASYNC_CANCEL,
709 .get_pages = i915_gem_userptr_get_pages,
710 .put_pages = i915_gem_userptr_put_pages,
711 .dmabuf_export = i915_gem_userptr_dmabuf_export,
712 .release = i915_gem_userptr_release,
713 };
714
715 /*
716 * Creates a new mm object that wraps some normal memory from the process
717 * context - user memory.
718 *
719 * We impose several restrictions upon the memory being mapped
720 * into the GPU.
721 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
722 * 2. It must be normal system memory, not a pointer into another map of IO
723 * space (e.g. it must not be a GTT mmapping of another object).
724 * 3. We only allow a bo as large as we could in theory map into the GTT,
725 * that is we limit the size to the total size of the GTT.
726 * 4. The bo is marked as being snoopable. The backing pages are left
727 * accessible directly by the CPU, but reads and writes by the GPU may
728 * incur the cost of a snoop (unless you have an LLC architecture).
729 *
730 * Synchronisation between multiple users and the GPU is left to userspace
731 * through the normal set-domain-ioctl. The kernel will enforce that the
732 * GPU relinquishes the VMA before it is returned back to the system
733 * i.e. upon free(), munmap() or process termination. However, the userspace
734 * malloc() library may not immediately relinquish the VMA after free() and
735 * instead reuse it whilst the GPU is still reading and writing to the VMA.
736 * Caveat emptor.
737 *
738 * Also note, that the object created here is not currently a "first class"
739 * object, in that several ioctls are banned. These are the CPU access
740 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
741 * direct access via your pointer rather than use those ioctls. Another
742 * restriction is that we do not allow userptr surfaces to be pinned to the
743 * hardware and so we reject any attempt to create a framebuffer out of a
744 * userptr.
745 *
746 * If you think this is a good interface to use to pass GPU memory between
747 * drivers, please use dma-buf instead. In fact, wherever possible use
748 * dma-buf instead.
749 */
750 int
i915_gem_userptr_ioctl(struct drm_device * dev,void * data,struct drm_file * file)751 i915_gem_userptr_ioctl(struct drm_device *dev,
752 void *data,
753 struct drm_file *file)
754 {
755 static struct lock_class_key lock_class;
756 struct drm_i915_private *dev_priv = to_i915(dev);
757 struct drm_i915_gem_userptr *args = data;
758 struct drm_i915_gem_object *obj;
759 int ret;
760 u32 handle;
761
762 if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
763 /* We cannot support coherent userptr objects on hw without
764 * LLC and broken snooping.
765 */
766 return -ENODEV;
767 }
768
769 if (args->flags & ~(I915_USERPTR_READ_ONLY |
770 I915_USERPTR_UNSYNCHRONIZED))
771 return -EINVAL;
772
773 /*
774 * XXX: There is a prevalence of the assumption that we fit the
775 * object's page count inside a 32bit _signed_ variable. Let's document
776 * this and catch if we ever need to fix it. In the meantime, if you do
777 * spot such a local variable, please consider fixing!
778 *
779 * Aside from our own locals (for which we have no excuse!):
780 * - sg_table embeds unsigned int for num_pages
781 * - get_user_pages*() mixed ints with longs
782 */
783
784 if (args->user_size >> PAGE_SHIFT > INT_MAX)
785 return -E2BIG;
786
787 if (overflows_type(args->user_size, obj->base.size))
788 return -E2BIG;
789
790 if (!args->user_size)
791 return -EINVAL;
792
793 if (offset_in_page(args->user_ptr | args->user_size))
794 return -EINVAL;
795
796 if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
797 return -EFAULT;
798
799 if (args->flags & I915_USERPTR_READ_ONLY) {
800 /*
801 * On almost all of the older hw, we cannot tell the GPU that
802 * a page is readonly.
803 */
804 if (!dev_priv->gt.vm->has_read_only)
805 return -ENODEV;
806 }
807
808 obj = i915_gem_object_alloc();
809 if (obj == NULL)
810 return -ENOMEM;
811
812 drm_gem_private_object_init(dev, &obj->base, args->user_size);
813 i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
814 obj->read_domains = I915_GEM_DOMAIN_CPU;
815 obj->write_domain = I915_GEM_DOMAIN_CPU;
816 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
817
818 obj->userptr.ptr = args->user_ptr;
819 if (args->flags & I915_USERPTR_READ_ONLY)
820 i915_gem_object_set_readonly(obj);
821
822 /* And keep a pointer to the current->mm for resolving the user pages
823 * at binding. This means that we need to hook into the mmu_notifier
824 * in order to detect if the mmu is destroyed.
825 */
826 ret = i915_gem_userptr_init__mm_struct(obj);
827 if (ret == 0)
828 ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
829 if (ret == 0)
830 ret = drm_gem_handle_create(file, &obj->base, &handle);
831
832 /* drop reference from allocate - handle holds it now */
833 i915_gem_object_put(obj);
834 if (ret)
835 return ret;
836
837 args->handle = handle;
838 return 0;
839 }
840
i915_gem_init_userptr(struct drm_i915_private * dev_priv)841 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
842 {
843 spin_lock_init(&dev_priv->mm_lock);
844 hash_init(dev_priv->mm_structs);
845
846 dev_priv->mm.userptr_wq =
847 alloc_workqueue("i915-userptr-acquire",
848 WQ_HIGHPRI | WQ_UNBOUND,
849 0);
850 if (!dev_priv->mm.userptr_wq)
851 return -ENOMEM;
852
853 return 0;
854 }
855
i915_gem_cleanup_userptr(struct drm_i915_private * dev_priv)856 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
857 {
858 destroy_workqueue(dev_priv->mm.userptr_wq);
859 }
860