• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28 
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35 
36 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
37 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
38 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
39 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
40 
41 #define BATCH_OFFSET_BIAS (256*1024)
42 
43 struct eb_vmas {
44 	struct list_head vmas;
45 	int and;
46 	union {
47 		struct i915_vma *lut[0];
48 		struct hlist_head buckets[0];
49 	};
50 };
51 
52 static struct eb_vmas *
eb_create(struct drm_i915_gem_execbuffer2 * args)53 eb_create(struct drm_i915_gem_execbuffer2 *args)
54 {
55 	struct eb_vmas *eb = NULL;
56 
57 	if (args->flags & I915_EXEC_HANDLE_LUT) {
58 		unsigned size = args->buffer_count;
59 		size *= sizeof(struct i915_vma *);
60 		size += sizeof(struct eb_vmas);
61 		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
62 	}
63 
64 	if (eb == NULL) {
65 		unsigned size = args->buffer_count;
66 		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
67 		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
68 		while (count > 2*size)
69 			count >>= 1;
70 		eb = kzalloc(count*sizeof(struct hlist_head) +
71 			     sizeof(struct eb_vmas),
72 			     GFP_TEMPORARY);
73 		if (eb == NULL)
74 			return eb;
75 
76 		eb->and = count - 1;
77 	} else
78 		eb->and = -args->buffer_count;
79 
80 	INIT_LIST_HEAD(&eb->vmas);
81 	return eb;
82 }
83 
84 static void
eb_reset(struct eb_vmas * eb)85 eb_reset(struct eb_vmas *eb)
86 {
87 	if (eb->and >= 0)
88 		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
89 }
90 
91 static int
eb_lookup_vmas(struct eb_vmas * eb,struct drm_i915_gem_exec_object2 * exec,const struct drm_i915_gem_execbuffer2 * args,struct i915_address_space * vm,struct drm_file * file)92 eb_lookup_vmas(struct eb_vmas *eb,
93 	       struct drm_i915_gem_exec_object2 *exec,
94 	       const struct drm_i915_gem_execbuffer2 *args,
95 	       struct i915_address_space *vm,
96 	       struct drm_file *file)
97 {
98 	struct drm_i915_gem_object *obj;
99 	struct list_head objects;
100 	int i, ret;
101 
102 	INIT_LIST_HEAD(&objects);
103 	spin_lock(&file->table_lock);
104 	/* Grab a reference to the object and release the lock so we can lookup
105 	 * or create the VMA without using GFP_ATOMIC */
106 	for (i = 0; i < args->buffer_count; i++) {
107 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
108 		if (obj == NULL) {
109 			spin_unlock(&file->table_lock);
110 			DRM_DEBUG("Invalid object handle %d at index %d\n",
111 				   exec[i].handle, i);
112 			ret = -ENOENT;
113 			goto err;
114 		}
115 
116 		if (!list_empty(&obj->obj_exec_link)) {
117 			spin_unlock(&file->table_lock);
118 			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
119 				   obj, exec[i].handle, i);
120 			ret = -EINVAL;
121 			goto err;
122 		}
123 
124 		drm_gem_object_reference(&obj->base);
125 		list_add_tail(&obj->obj_exec_link, &objects);
126 	}
127 	spin_unlock(&file->table_lock);
128 
129 	i = 0;
130 	while (!list_empty(&objects)) {
131 		struct i915_vma *vma;
132 
133 		obj = list_first_entry(&objects,
134 				       struct drm_i915_gem_object,
135 				       obj_exec_link);
136 
137 		/*
138 		 * NOTE: We can leak any vmas created here when something fails
139 		 * later on. But that's no issue since vma_unbind can deal with
140 		 * vmas which are not actually bound. And since only
141 		 * lookup_or_create exists as an interface to get at the vma
142 		 * from the (obj, vm) we don't run the risk of creating
143 		 * duplicated vmas for the same vm.
144 		 */
145 		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
146 		if (IS_ERR(vma)) {
147 			DRM_DEBUG("Failed to lookup VMA\n");
148 			ret = PTR_ERR(vma);
149 			goto err;
150 		}
151 
152 		/* Transfer ownership from the objects list to the vmas list. */
153 		list_add_tail(&vma->exec_list, &eb->vmas);
154 		list_del_init(&obj->obj_exec_link);
155 
156 		vma->exec_entry = &exec[i];
157 		if (eb->and < 0) {
158 			eb->lut[i] = vma;
159 		} else {
160 			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
161 			vma->exec_handle = handle;
162 			hlist_add_head(&vma->exec_node,
163 				       &eb->buckets[handle & eb->and]);
164 		}
165 		++i;
166 	}
167 
168 	return 0;
169 
170 
171 err:
172 	while (!list_empty(&objects)) {
173 		obj = list_first_entry(&objects,
174 				       struct drm_i915_gem_object,
175 				       obj_exec_link);
176 		list_del_init(&obj->obj_exec_link);
177 		drm_gem_object_unreference(&obj->base);
178 	}
179 	/*
180 	 * Objects already transfered to the vmas list will be unreferenced by
181 	 * eb_destroy.
182 	 */
183 
184 	return ret;
185 }
186 
eb_get_vma(struct eb_vmas * eb,unsigned long handle)187 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
188 {
189 	if (eb->and < 0) {
190 		if (handle >= -eb->and)
191 			return NULL;
192 		return eb->lut[handle];
193 	} else {
194 		struct hlist_head *head;
195 		struct hlist_node *node;
196 
197 		head = &eb->buckets[handle & eb->and];
198 		hlist_for_each(node, head) {
199 			struct i915_vma *vma;
200 
201 			vma = hlist_entry(node, struct i915_vma, exec_node);
202 			if (vma->exec_handle == handle)
203 				return vma;
204 		}
205 		return NULL;
206 	}
207 }
208 
209 static void
i915_gem_execbuffer_unreserve_vma(struct i915_vma * vma)210 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
211 {
212 	struct drm_i915_gem_exec_object2 *entry;
213 	struct drm_i915_gem_object *obj = vma->obj;
214 
215 	if (!drm_mm_node_allocated(&vma->node))
216 		return;
217 
218 	entry = vma->exec_entry;
219 
220 	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
221 		i915_gem_object_unpin_fence(obj);
222 
223 	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
224 		vma->pin_count--;
225 
226 	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
227 }
228 
eb_destroy(struct eb_vmas * eb)229 static void eb_destroy(struct eb_vmas *eb)
230 {
231 	while (!list_empty(&eb->vmas)) {
232 		struct i915_vma *vma;
233 
234 		vma = list_first_entry(&eb->vmas,
235 				       struct i915_vma,
236 				       exec_list);
237 		list_del_init(&vma->exec_list);
238 		i915_gem_execbuffer_unreserve_vma(vma);
239 		drm_gem_object_unreference(&vma->obj->base);
240 	}
241 	kfree(eb);
242 }
243 
use_cpu_reloc(struct drm_i915_gem_object * obj)244 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
245 {
246 	return (HAS_LLC(obj->base.dev) ||
247 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
248 		!obj->map_and_fenceable ||
249 		obj->cache_level != I915_CACHE_NONE);
250 }
251 
252 static int
relocate_entry_cpu(struct drm_i915_gem_object * obj,struct drm_i915_gem_relocation_entry * reloc,uint64_t target_offset)253 relocate_entry_cpu(struct drm_i915_gem_object *obj,
254 		   struct drm_i915_gem_relocation_entry *reloc,
255 		   uint64_t target_offset)
256 {
257 	struct drm_device *dev = obj->base.dev;
258 	uint32_t page_offset = offset_in_page(reloc->offset);
259 	uint64_t delta = reloc->delta + target_offset;
260 	char *vaddr;
261 	int ret;
262 
263 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
264 	if (ret)
265 		return ret;
266 
267 	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
268 				reloc->offset >> PAGE_SHIFT));
269 	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
270 
271 	if (INTEL_INFO(dev)->gen >= 8) {
272 		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
273 
274 		if (page_offset == 0) {
275 			kunmap_atomic(vaddr);
276 			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
277 			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
278 		}
279 
280 		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
281 	}
282 
283 	kunmap_atomic(vaddr);
284 
285 	return 0;
286 }
287 
288 static int
relocate_entry_gtt(struct drm_i915_gem_object * obj,struct drm_i915_gem_relocation_entry * reloc,uint64_t target_offset)289 relocate_entry_gtt(struct drm_i915_gem_object *obj,
290 		   struct drm_i915_gem_relocation_entry *reloc,
291 		   uint64_t target_offset)
292 {
293 	struct drm_device *dev = obj->base.dev;
294 	struct drm_i915_private *dev_priv = dev->dev_private;
295 	uint64_t delta = reloc->delta + target_offset;
296 	uint64_t offset;
297 	void __iomem *reloc_page;
298 	int ret;
299 
300 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
301 	if (ret)
302 		return ret;
303 
304 	ret = i915_gem_object_put_fence(obj);
305 	if (ret)
306 		return ret;
307 
308 	/* Map the page containing the relocation we're going to perform.  */
309 	offset = i915_gem_obj_ggtt_offset(obj);
310 	offset += reloc->offset;
311 	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
312 					      offset & PAGE_MASK);
313 	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
314 
315 	if (INTEL_INFO(dev)->gen >= 8) {
316 		offset += sizeof(uint32_t);
317 
318 		if (offset_in_page(offset) == 0) {
319 			io_mapping_unmap_atomic(reloc_page);
320 			reloc_page =
321 				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
322 							 offset);
323 		}
324 
325 		iowrite32(upper_32_bits(delta),
326 			  reloc_page + offset_in_page(offset));
327 	}
328 
329 	io_mapping_unmap_atomic(reloc_page);
330 
331 	return 0;
332 }
333 
334 static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object * obj,struct eb_vmas * eb,struct drm_i915_gem_relocation_entry * reloc)335 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
336 				   struct eb_vmas *eb,
337 				   struct drm_i915_gem_relocation_entry *reloc)
338 {
339 	struct drm_device *dev = obj->base.dev;
340 	struct drm_gem_object *target_obj;
341 	struct drm_i915_gem_object *target_i915_obj;
342 	struct i915_vma *target_vma;
343 	uint64_t target_offset;
344 	int ret;
345 
346 	/* we've already hold a reference to all valid objects */
347 	target_vma = eb_get_vma(eb, reloc->target_handle);
348 	if (unlikely(target_vma == NULL))
349 		return -ENOENT;
350 	target_i915_obj = target_vma->obj;
351 	target_obj = &target_vma->obj->base;
352 
353 	target_offset = target_vma->node.start;
354 
355 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
356 	 * pipe_control writes because the gpu doesn't properly redirect them
357 	 * through the ppgtt for non_secure batchbuffers. */
358 	if (unlikely(IS_GEN6(dev) &&
359 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
360 	    !target_i915_obj->has_global_gtt_mapping)) {
361 		struct i915_vma *vma =
362 			list_first_entry(&target_i915_obj->vma_list,
363 					 typeof(*vma), vma_link);
364 		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
365 	}
366 
367 	/* Validate that the target is in a valid r/w GPU domain */
368 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
369 		DRM_DEBUG("reloc with multiple write domains: "
370 			  "obj %p target %d offset %d "
371 			  "read %08x write %08x",
372 			  obj, reloc->target_handle,
373 			  (int) reloc->offset,
374 			  reloc->read_domains,
375 			  reloc->write_domain);
376 		return -EINVAL;
377 	}
378 	if (unlikely((reloc->write_domain | reloc->read_domains)
379 		     & ~I915_GEM_GPU_DOMAINS)) {
380 		DRM_DEBUG("reloc with read/write non-GPU domains: "
381 			  "obj %p target %d offset %d "
382 			  "read %08x write %08x",
383 			  obj, reloc->target_handle,
384 			  (int) reloc->offset,
385 			  reloc->read_domains,
386 			  reloc->write_domain);
387 		return -EINVAL;
388 	}
389 
390 	target_obj->pending_read_domains |= reloc->read_domains;
391 	target_obj->pending_write_domain |= reloc->write_domain;
392 
393 	/* If the relocation already has the right value in it, no
394 	 * more work needs to be done.
395 	 */
396 	if (target_offset == reloc->presumed_offset)
397 		return 0;
398 
399 	/* Check that the relocation address is valid... */
400 	if (unlikely(reloc->offset >
401 		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
402 		DRM_DEBUG("Relocation beyond object bounds: "
403 			  "obj %p target %d offset %d size %d.\n",
404 			  obj, reloc->target_handle,
405 			  (int) reloc->offset,
406 			  (int) obj->base.size);
407 		return -EINVAL;
408 	}
409 	if (unlikely(reloc->offset & 3)) {
410 		DRM_DEBUG("Relocation not 4-byte aligned: "
411 			  "obj %p target %d offset %d.\n",
412 			  obj, reloc->target_handle,
413 			  (int) reloc->offset);
414 		return -EINVAL;
415 	}
416 
417 	/* We can't wait for rendering with pagefaults disabled */
418 	if (obj->active && in_atomic())
419 		return -EFAULT;
420 
421 	if (use_cpu_reloc(obj))
422 		ret = relocate_entry_cpu(obj, reloc, target_offset);
423 	else
424 		ret = relocate_entry_gtt(obj, reloc, target_offset);
425 
426 	if (ret)
427 		return ret;
428 
429 	/* and update the user's relocation entry */
430 	reloc->presumed_offset = target_offset;
431 
432 	return 0;
433 }
434 
435 static int
i915_gem_execbuffer_relocate_vma(struct i915_vma * vma,struct eb_vmas * eb)436 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
437 				 struct eb_vmas *eb)
438 {
439 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
440 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
441 	struct drm_i915_gem_relocation_entry __user *user_relocs;
442 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
443 	int remain, ret;
444 
445 	user_relocs = to_user_ptr(entry->relocs_ptr);
446 
447 	remain = entry->relocation_count;
448 	while (remain) {
449 		struct drm_i915_gem_relocation_entry *r = stack_reloc;
450 		int count = remain;
451 		if (count > ARRAY_SIZE(stack_reloc))
452 			count = ARRAY_SIZE(stack_reloc);
453 		remain -= count;
454 
455 		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
456 			return -EFAULT;
457 
458 		do {
459 			u64 offset = r->presumed_offset;
460 
461 			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
462 			if (ret)
463 				return ret;
464 
465 			if (r->presumed_offset != offset &&
466 			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
467 						    &r->presumed_offset,
468 						    sizeof(r->presumed_offset))) {
469 				return -EFAULT;
470 			}
471 
472 			user_relocs++;
473 			r++;
474 		} while (--count);
475 	}
476 
477 	return 0;
478 #undef N_RELOC
479 }
480 
481 static int
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma * vma,struct eb_vmas * eb,struct drm_i915_gem_relocation_entry * relocs)482 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
483 				      struct eb_vmas *eb,
484 				      struct drm_i915_gem_relocation_entry *relocs)
485 {
486 	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
487 	int i, ret;
488 
489 	for (i = 0; i < entry->relocation_count; i++) {
490 		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
491 		if (ret)
492 			return ret;
493 	}
494 
495 	return 0;
496 }
497 
498 static int
i915_gem_execbuffer_relocate(struct eb_vmas * eb)499 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
500 {
501 	struct i915_vma *vma;
502 	int ret = 0;
503 
504 	/* This is the fast path and we cannot handle a pagefault whilst
505 	 * holding the struct mutex lest the user pass in the relocations
506 	 * contained within a mmaped bo. For in such a case we, the page
507 	 * fault handler would call i915_gem_fault() and we would try to
508 	 * acquire the struct mutex again. Obviously this is bad and so
509 	 * lockdep complains vehemently.
510 	 */
511 	pagefault_disable();
512 	list_for_each_entry(vma, &eb->vmas, exec_list) {
513 		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
514 		if (ret)
515 			break;
516 	}
517 	pagefault_enable();
518 
519 	return ret;
520 }
521 
522 static int
i915_gem_execbuffer_reserve_vma(struct i915_vma * vma,struct intel_engine_cs * ring,bool * need_reloc)523 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
524 				struct intel_engine_cs *ring,
525 				bool *need_reloc)
526 {
527 	struct drm_i915_gem_object *obj = vma->obj;
528 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
529 	uint64_t flags;
530 	int ret;
531 
532 	flags = 0;
533 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
534 		flags |= PIN_MAPPABLE;
535 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
536 		flags |= PIN_GLOBAL;
537 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
538 		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
539 
540 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
541 	if (ret)
542 		return ret;
543 
544 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
545 
546 	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
547 		ret = i915_gem_object_get_fence(obj);
548 		if (ret)
549 			return ret;
550 
551 		if (i915_gem_object_pin_fence(obj))
552 			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
553 	}
554 
555 	if (entry->offset != vma->node.start) {
556 		entry->offset = vma->node.start;
557 		*need_reloc = true;
558 	}
559 
560 	if (entry->flags & EXEC_OBJECT_WRITE) {
561 		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
562 		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
563 	}
564 
565 	return 0;
566 }
567 
568 static bool
need_reloc_mappable(struct i915_vma * vma)569 need_reloc_mappable(struct i915_vma *vma)
570 {
571 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
572 
573 	if (entry->relocation_count == 0)
574 		return false;
575 
576 	if (!i915_is_ggtt(vma->vm))
577 		return false;
578 
579 	/* See also use_cpu_reloc() */
580 	if (HAS_LLC(vma->obj->base.dev))
581 		return false;
582 
583 	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
584 		return false;
585 
586 	return true;
587 }
588 
589 static bool
eb_vma_misplaced(struct i915_vma * vma)590 eb_vma_misplaced(struct i915_vma *vma)
591 {
592 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
593 	struct drm_i915_gem_object *obj = vma->obj;
594 
595 	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
596 	       !i915_is_ggtt(vma->vm));
597 
598 	if (entry->alignment &&
599 	    vma->node.start & (entry->alignment - 1))
600 		return true;
601 
602 	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
603 		return true;
604 
605 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
606 	    vma->node.start < BATCH_OFFSET_BIAS)
607 		return true;
608 
609 	return false;
610 }
611 
612 static int
i915_gem_execbuffer_reserve(struct intel_engine_cs * ring,struct list_head * vmas,bool * need_relocs)613 i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
614 			    struct list_head *vmas,
615 			    bool *need_relocs)
616 {
617 	struct drm_i915_gem_object *obj;
618 	struct i915_vma *vma;
619 	struct i915_address_space *vm;
620 	struct list_head ordered_vmas;
621 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
622 	int retry;
623 
624 	i915_gem_retire_requests_ring(ring);
625 
626 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
627 
628 	INIT_LIST_HEAD(&ordered_vmas);
629 	while (!list_empty(vmas)) {
630 		struct drm_i915_gem_exec_object2 *entry;
631 		bool need_fence, need_mappable;
632 
633 		vma = list_first_entry(vmas, struct i915_vma, exec_list);
634 		obj = vma->obj;
635 		entry = vma->exec_entry;
636 
637 		if (!has_fenced_gpu_access)
638 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
639 		need_fence =
640 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
641 			obj->tiling_mode != I915_TILING_NONE;
642 		need_mappable = need_fence || need_reloc_mappable(vma);
643 
644 		if (need_mappable) {
645 			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
646 			list_move(&vma->exec_list, &ordered_vmas);
647 		} else
648 			list_move_tail(&vma->exec_list, &ordered_vmas);
649 
650 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
651 		obj->base.pending_write_domain = 0;
652 	}
653 	list_splice(&ordered_vmas, vmas);
654 
655 	/* Attempt to pin all of the buffers into the GTT.
656 	 * This is done in 3 phases:
657 	 *
658 	 * 1a. Unbind all objects that do not match the GTT constraints for
659 	 *     the execbuffer (fenceable, mappable, alignment etc).
660 	 * 1b. Increment pin count for already bound objects.
661 	 * 2.  Bind new objects.
662 	 * 3.  Decrement pin count.
663 	 *
664 	 * This avoid unnecessary unbinding of later objects in order to make
665 	 * room for the earlier objects *unless* we need to defragment.
666 	 */
667 	retry = 0;
668 	do {
669 		int ret = 0;
670 
671 		/* Unbind any ill-fitting objects or pin. */
672 		list_for_each_entry(vma, vmas, exec_list) {
673 			if (!drm_mm_node_allocated(&vma->node))
674 				continue;
675 
676 			if (eb_vma_misplaced(vma))
677 				ret = i915_vma_unbind(vma);
678 			else
679 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
680 			if (ret)
681 				goto err;
682 		}
683 
684 		/* Bind fresh objects */
685 		list_for_each_entry(vma, vmas, exec_list) {
686 			if (drm_mm_node_allocated(&vma->node))
687 				continue;
688 
689 			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
690 			if (ret)
691 				goto err;
692 		}
693 
694 err:
695 		if (ret != -ENOSPC || retry++)
696 			return ret;
697 
698 		/* Decrement pin count for bound objects */
699 		list_for_each_entry(vma, vmas, exec_list)
700 			i915_gem_execbuffer_unreserve_vma(vma);
701 
702 		ret = i915_gem_evict_vm(vm, true);
703 		if (ret)
704 			return ret;
705 	} while (1);
706 }
707 
708 static int
i915_gem_execbuffer_relocate_slow(struct drm_device * dev,struct drm_i915_gem_execbuffer2 * args,struct drm_file * file,struct intel_engine_cs * ring,struct eb_vmas * eb,struct drm_i915_gem_exec_object2 * exec)709 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
710 				  struct drm_i915_gem_execbuffer2 *args,
711 				  struct drm_file *file,
712 				  struct intel_engine_cs *ring,
713 				  struct eb_vmas *eb,
714 				  struct drm_i915_gem_exec_object2 *exec)
715 {
716 	struct drm_i915_gem_relocation_entry *reloc;
717 	struct i915_address_space *vm;
718 	struct i915_vma *vma;
719 	bool need_relocs;
720 	int *reloc_offset;
721 	int i, total, ret;
722 	unsigned count = args->buffer_count;
723 
724 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
725 
726 	/* We may process another execbuffer during the unlock... */
727 	while (!list_empty(&eb->vmas)) {
728 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
729 		list_del_init(&vma->exec_list);
730 		i915_gem_execbuffer_unreserve_vma(vma);
731 		drm_gem_object_unreference(&vma->obj->base);
732 	}
733 
734 	mutex_unlock(&dev->struct_mutex);
735 
736 	total = 0;
737 	for (i = 0; i < count; i++)
738 		total += exec[i].relocation_count;
739 
740 	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
741 	reloc = drm_malloc_ab(total, sizeof(*reloc));
742 	if (reloc == NULL || reloc_offset == NULL) {
743 		drm_free_large(reloc);
744 		drm_free_large(reloc_offset);
745 		mutex_lock(&dev->struct_mutex);
746 		return -ENOMEM;
747 	}
748 
749 	total = 0;
750 	for (i = 0; i < count; i++) {
751 		struct drm_i915_gem_relocation_entry __user *user_relocs;
752 		u64 invalid_offset = (u64)-1;
753 		int j;
754 
755 		user_relocs = to_user_ptr(exec[i].relocs_ptr);
756 
757 		if (copy_from_user(reloc+total, user_relocs,
758 				   exec[i].relocation_count * sizeof(*reloc))) {
759 			ret = -EFAULT;
760 			mutex_lock(&dev->struct_mutex);
761 			goto err;
762 		}
763 
764 		/* As we do not update the known relocation offsets after
765 		 * relocating (due to the complexities in lock handling),
766 		 * we need to mark them as invalid now so that we force the
767 		 * relocation processing next time. Just in case the target
768 		 * object is evicted and then rebound into its old
769 		 * presumed_offset before the next execbuffer - if that
770 		 * happened we would make the mistake of assuming that the
771 		 * relocations were valid.
772 		 */
773 		for (j = 0; j < exec[i].relocation_count; j++) {
774 			if (__copy_to_user(&user_relocs[j].presumed_offset,
775 					   &invalid_offset,
776 					   sizeof(invalid_offset))) {
777 				ret = -EFAULT;
778 				mutex_lock(&dev->struct_mutex);
779 				goto err;
780 			}
781 		}
782 
783 		reloc_offset[i] = total;
784 		total += exec[i].relocation_count;
785 	}
786 
787 	ret = i915_mutex_lock_interruptible(dev);
788 	if (ret) {
789 		mutex_lock(&dev->struct_mutex);
790 		goto err;
791 	}
792 
793 	/* reacquire the objects */
794 	eb_reset(eb);
795 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
796 	if (ret)
797 		goto err;
798 
799 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
800 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
801 	if (ret)
802 		goto err;
803 
804 	list_for_each_entry(vma, &eb->vmas, exec_list) {
805 		int offset = vma->exec_entry - exec;
806 		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
807 							    reloc + reloc_offset[offset]);
808 		if (ret)
809 			goto err;
810 	}
811 
812 	/* Leave the user relocations as are, this is the painfully slow path,
813 	 * and we want to avoid the complication of dropping the lock whilst
814 	 * having buffers reserved in the aperture and so causing spurious
815 	 * ENOSPC for random operations.
816 	 */
817 
818 err:
819 	drm_free_large(reloc);
820 	drm_free_large(reloc_offset);
821 	return ret;
822 }
823 
824 static int
i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs * ring,struct list_head * vmas)825 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
826 				struct list_head *vmas)
827 {
828 	struct i915_vma *vma;
829 	uint32_t flush_domains = 0;
830 	bool flush_chipset = false;
831 	int ret;
832 
833 	list_for_each_entry(vma, vmas, exec_list) {
834 		struct drm_i915_gem_object *obj = vma->obj;
835 		ret = i915_gem_object_sync(obj, ring);
836 		if (ret)
837 			return ret;
838 
839 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
840 			flush_chipset |= i915_gem_clflush_object(obj, false);
841 
842 		flush_domains |= obj->base.write_domain;
843 	}
844 
845 	if (flush_chipset)
846 		i915_gem_chipset_flush(ring->dev);
847 
848 	if (flush_domains & I915_GEM_DOMAIN_GTT)
849 		wmb();
850 
851 	/* Unconditionally invalidate gpu caches and ensure that we do flush
852 	 * any residual writes from the previous batch.
853 	 */
854 	return intel_ring_invalidate_all_caches(ring);
855 }
856 
857 static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 * exec)858 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
859 {
860 	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
861 		return false;
862 
863 	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
864 }
865 
866 static int
validate_exec_list(struct drm_device * dev,struct drm_i915_gem_exec_object2 * exec,int count)867 validate_exec_list(struct drm_device *dev,
868 		   struct drm_i915_gem_exec_object2 *exec,
869 		   int count)
870 {
871 	unsigned relocs_total = 0;
872 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
873 	unsigned invalid_flags;
874 	int i;
875 
876 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
877 	if (USES_FULL_PPGTT(dev))
878 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
879 
880 	for (i = 0; i < count; i++) {
881 		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
882 		int length; /* limited by fault_in_pages_readable() */
883 
884 		if (exec[i].flags & invalid_flags)
885 			return -EINVAL;
886 
887 		/* First check for malicious input causing overflow in
888 		 * the worst case where we need to allocate the entire
889 		 * relocation tree as a single array.
890 		 */
891 		if (exec[i].relocation_count > relocs_max - relocs_total)
892 			return -EINVAL;
893 		relocs_total += exec[i].relocation_count;
894 
895 		length = exec[i].relocation_count *
896 			sizeof(struct drm_i915_gem_relocation_entry);
897 		/*
898 		 * We must check that the entire relocation array is safe
899 		 * to read, but since we may need to update the presumed
900 		 * offsets during execution, check for full write access.
901 		 */
902 		if (!access_ok(VERIFY_WRITE, ptr, length))
903 			return -EFAULT;
904 
905 		if (likely(!i915.prefault_disable)) {
906 			if (fault_in_multipages_readable(ptr, length))
907 				return -EFAULT;
908 		}
909 	}
910 
911 	return 0;
912 }
913 
914 static struct intel_context *
i915_gem_validate_context(struct drm_device * dev,struct drm_file * file,struct intel_engine_cs * ring,const u32 ctx_id)915 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
916 			  struct intel_engine_cs *ring, const u32 ctx_id)
917 {
918 	struct intel_context *ctx = NULL;
919 	struct i915_ctx_hang_stats *hs;
920 
921 	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
922 		return ERR_PTR(-EINVAL);
923 
924 	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
925 	if (IS_ERR(ctx))
926 		return ctx;
927 
928 	hs = &ctx->hang_stats;
929 	if (hs->banned) {
930 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
931 		return ERR_PTR(-EIO);
932 	}
933 
934 	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
935 		int ret = intel_lr_context_deferred_create(ctx, ring);
936 		if (ret) {
937 			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
938 			return ERR_PTR(ret);
939 		}
940 	}
941 
942 	return ctx;
943 }
944 
945 void
i915_gem_execbuffer_move_to_active(struct list_head * vmas,struct intel_engine_cs * ring)946 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
947 				   struct intel_engine_cs *ring)
948 {
949 	u32 seqno = intel_ring_get_seqno(ring);
950 	struct i915_vma *vma;
951 
952 	list_for_each_entry(vma, vmas, exec_list) {
953 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
954 		struct drm_i915_gem_object *obj = vma->obj;
955 		u32 old_read = obj->base.read_domains;
956 		u32 old_write = obj->base.write_domain;
957 
958 		obj->base.write_domain = obj->base.pending_write_domain;
959 		if (obj->base.write_domain == 0)
960 			obj->base.pending_read_domains |= obj->base.read_domains;
961 		obj->base.read_domains = obj->base.pending_read_domains;
962 
963 		i915_vma_move_to_active(vma, ring);
964 		if (obj->base.write_domain) {
965 			obj->dirty = 1;
966 			obj->last_write_seqno = seqno;
967 
968 			intel_fb_obj_invalidate(obj, ring);
969 
970 			/* update for the implicit flush after a batch */
971 			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
972 		}
973 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
974 			obj->last_fenced_seqno = seqno;
975 			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
976 				struct drm_i915_private *dev_priv = to_i915(ring->dev);
977 				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
978 					       &dev_priv->mm.fence_list);
979 			}
980 		}
981 
982 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
983 	}
984 }
985 
986 void
i915_gem_execbuffer_retire_commands(struct drm_device * dev,struct drm_file * file,struct intel_engine_cs * ring,struct drm_i915_gem_object * obj)987 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
988 				    struct drm_file *file,
989 				    struct intel_engine_cs *ring,
990 				    struct drm_i915_gem_object *obj)
991 {
992 	/* Unconditionally force add_request to emit a full flush. */
993 	ring->gpu_caches_dirty = true;
994 
995 	/* Add a breadcrumb for the completion of the batch buffer */
996 	(void)__i915_add_request(ring, file, obj, NULL);
997 }
998 
999 static int
i915_reset_gen7_sol_offsets(struct drm_device * dev,struct intel_engine_cs * ring)1000 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1001 			    struct intel_engine_cs *ring)
1002 {
1003 	struct drm_i915_private *dev_priv = dev->dev_private;
1004 	int ret, i;
1005 
1006 	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1007 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1008 		return -EINVAL;
1009 	}
1010 
1011 	ret = intel_ring_begin(ring, 4 * 3);
1012 	if (ret)
1013 		return ret;
1014 
1015 	for (i = 0; i < 4; i++) {
1016 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1017 		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1018 		intel_ring_emit(ring, 0);
1019 	}
1020 
1021 	intel_ring_advance(ring);
1022 
1023 	return 0;
1024 }
1025 
1026 int
i915_gem_ringbuffer_submission(struct drm_device * dev,struct drm_file * file,struct intel_engine_cs * ring,struct intel_context * ctx,struct drm_i915_gem_execbuffer2 * args,struct list_head * vmas,struct drm_i915_gem_object * batch_obj,u64 exec_start,u32 flags)1027 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1028 			       struct intel_engine_cs *ring,
1029 			       struct intel_context *ctx,
1030 			       struct drm_i915_gem_execbuffer2 *args,
1031 			       struct list_head *vmas,
1032 			       struct drm_i915_gem_object *batch_obj,
1033 			       u64 exec_start, u32 flags)
1034 {
1035 	struct drm_clip_rect *cliprects = NULL;
1036 	struct drm_i915_private *dev_priv = dev->dev_private;
1037 	u64 exec_len;
1038 	int instp_mode;
1039 	u32 instp_mask;
1040 	int i, ret = 0;
1041 
1042 	if (args->num_cliprects != 0) {
1043 		if (ring != &dev_priv->ring[RCS]) {
1044 			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1045 			return -EINVAL;
1046 		}
1047 
1048 		if (INTEL_INFO(dev)->gen >= 5) {
1049 			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1050 			return -EINVAL;
1051 		}
1052 
1053 		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1054 			DRM_DEBUG("execbuf with %u cliprects\n",
1055 				  args->num_cliprects);
1056 			return -EINVAL;
1057 		}
1058 
1059 		cliprects = kcalloc(args->num_cliprects,
1060 				    sizeof(*cliprects),
1061 				    GFP_KERNEL);
1062 		if (cliprects == NULL) {
1063 			ret = -ENOMEM;
1064 			goto error;
1065 		}
1066 
1067 		if (copy_from_user(cliprects,
1068 				   to_user_ptr(args->cliprects_ptr),
1069 				   sizeof(*cliprects)*args->num_cliprects)) {
1070 			ret = -EFAULT;
1071 			goto error;
1072 		}
1073 	} else {
1074 		if (args->DR4 == 0xffffffff) {
1075 			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1076 			args->DR4 = 0;
1077 		}
1078 
1079 		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1080 			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1081 			return -EINVAL;
1082 		}
1083 	}
1084 
1085 	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1086 	if (ret)
1087 		goto error;
1088 
1089 	ret = i915_switch_context(ring, ctx);
1090 	if (ret)
1091 		goto error;
1092 
1093 	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1094 	instp_mask = I915_EXEC_CONSTANTS_MASK;
1095 	switch (instp_mode) {
1096 	case I915_EXEC_CONSTANTS_REL_GENERAL:
1097 	case I915_EXEC_CONSTANTS_ABSOLUTE:
1098 	case I915_EXEC_CONSTANTS_REL_SURFACE:
1099 		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1100 			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1101 			ret = -EINVAL;
1102 			goto error;
1103 		}
1104 
1105 		if (instp_mode != dev_priv->relative_constants_mode) {
1106 			if (INTEL_INFO(dev)->gen < 4) {
1107 				DRM_DEBUG("no rel constants on pre-gen4\n");
1108 				ret = -EINVAL;
1109 				goto error;
1110 			}
1111 
1112 			if (INTEL_INFO(dev)->gen > 5 &&
1113 			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1114 				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1115 				ret = -EINVAL;
1116 				goto error;
1117 			}
1118 
1119 			/* The HW changed the meaning on this bit on gen6 */
1120 			if (INTEL_INFO(dev)->gen >= 6)
1121 				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1122 		}
1123 		break;
1124 	default:
1125 		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1126 		ret = -EINVAL;
1127 		goto error;
1128 	}
1129 
1130 	if (ring == &dev_priv->ring[RCS] &&
1131 			instp_mode != dev_priv->relative_constants_mode) {
1132 		ret = intel_ring_begin(ring, 4);
1133 		if (ret)
1134 			goto error;
1135 
1136 		intel_ring_emit(ring, MI_NOOP);
1137 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1138 		intel_ring_emit(ring, INSTPM);
1139 		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1140 		intel_ring_advance(ring);
1141 
1142 		dev_priv->relative_constants_mode = instp_mode;
1143 	}
1144 
1145 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1146 		ret = i915_reset_gen7_sol_offsets(dev, ring);
1147 		if (ret)
1148 			goto error;
1149 	}
1150 
1151 	exec_len = args->batch_len;
1152 	if (cliprects) {
1153 		for (i = 0; i < args->num_cliprects; i++) {
1154 			ret = i915_emit_box(dev, &cliprects[i],
1155 					    args->DR1, args->DR4);
1156 			if (ret)
1157 				goto error;
1158 
1159 			ret = ring->dispatch_execbuffer(ring,
1160 							exec_start, exec_len,
1161 							flags);
1162 			if (ret)
1163 				goto error;
1164 		}
1165 	} else {
1166 		ret = ring->dispatch_execbuffer(ring,
1167 						exec_start, exec_len,
1168 						flags);
1169 		if (ret)
1170 			return ret;
1171 	}
1172 
1173 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1174 
1175 	i915_gem_execbuffer_move_to_active(vmas, ring);
1176 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1177 
1178 error:
1179 	kfree(cliprects);
1180 	return ret;
1181 }
1182 
1183 /**
1184  * Find one BSD ring to dispatch the corresponding BSD command.
1185  * The Ring ID is returned.
1186  */
gen8_dispatch_bsd_ring(struct drm_device * dev,struct drm_file * file)1187 static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1188 				  struct drm_file *file)
1189 {
1190 	struct drm_i915_private *dev_priv = dev->dev_private;
1191 	struct drm_i915_file_private *file_priv = file->driver_priv;
1192 
1193 	/* Check whether the file_priv is using one ring */
1194 	if (file_priv->bsd_ring)
1195 		return file_priv->bsd_ring->id;
1196 	else {
1197 		/* If no, use the ping-pong mechanism to select one ring */
1198 		int ring_id;
1199 
1200 		mutex_lock(&dev->struct_mutex);
1201 		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1202 			ring_id = VCS;
1203 			dev_priv->mm.bsd_ring_dispatch_index = 1;
1204 		} else {
1205 			ring_id = VCS2;
1206 			dev_priv->mm.bsd_ring_dispatch_index = 0;
1207 		}
1208 		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1209 		mutex_unlock(&dev->struct_mutex);
1210 		return ring_id;
1211 	}
1212 }
1213 
1214 static struct drm_i915_gem_object *
eb_get_batch(struct eb_vmas * eb)1215 eb_get_batch(struct eb_vmas *eb)
1216 {
1217 	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1218 
1219 	/*
1220 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1221 	 * to negative relocation deltas. Usually that works out ok since the
1222 	 * relocate address is still positive, except when the batch is placed
1223 	 * very low in the GTT. Ensure this doesn't happen.
1224 	 *
1225 	 * Note that actual hangs have only been observed on gen7, but for
1226 	 * paranoia do it everywhere.
1227 	 */
1228 	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1229 
1230 	return vma->obj;
1231 }
1232 
1233 static int
i915_gem_do_execbuffer(struct drm_device * dev,void * data,struct drm_file * file,struct drm_i915_gem_execbuffer2 * args,struct drm_i915_gem_exec_object2 * exec)1234 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1235 		       struct drm_file *file,
1236 		       struct drm_i915_gem_execbuffer2 *args,
1237 		       struct drm_i915_gem_exec_object2 *exec)
1238 {
1239 	struct drm_i915_private *dev_priv = dev->dev_private;
1240 	struct eb_vmas *eb;
1241 	struct drm_i915_gem_object *batch_obj;
1242 	struct intel_engine_cs *ring;
1243 	struct intel_context *ctx;
1244 	struct i915_address_space *vm;
1245 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1246 	u64 exec_start = args->batch_start_offset;
1247 	u32 flags;
1248 	int ret;
1249 	bool need_relocs;
1250 
1251 	if (!i915_gem_check_execbuffer(args))
1252 		return -EINVAL;
1253 
1254 	ret = validate_exec_list(dev, exec, args->buffer_count);
1255 	if (ret)
1256 		return ret;
1257 
1258 	flags = 0;
1259 	if (args->flags & I915_EXEC_SECURE) {
1260 		if (!file->is_master || !capable(CAP_SYS_ADMIN))
1261 		    return -EPERM;
1262 
1263 		flags |= I915_DISPATCH_SECURE;
1264 	}
1265 	if (args->flags & I915_EXEC_IS_PINNED)
1266 		flags |= I915_DISPATCH_PINNED;
1267 
1268 	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1269 		DRM_DEBUG("execbuf with unknown ring: %d\n",
1270 			  (int)(args->flags & I915_EXEC_RING_MASK));
1271 		return -EINVAL;
1272 	}
1273 
1274 	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1275 		ring = &dev_priv->ring[RCS];
1276 	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1277 		if (HAS_BSD2(dev)) {
1278 			int ring_id;
1279 			ring_id = gen8_dispatch_bsd_ring(dev, file);
1280 			ring = &dev_priv->ring[ring_id];
1281 		} else
1282 			ring = &dev_priv->ring[VCS];
1283 	} else
1284 		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1285 
1286 	if (!intel_ring_initialized(ring)) {
1287 		DRM_DEBUG("execbuf with invalid ring: %d\n",
1288 			  (int)(args->flags & I915_EXEC_RING_MASK));
1289 		return -EINVAL;
1290 	}
1291 
1292 	if (args->buffer_count < 1) {
1293 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1294 		return -EINVAL;
1295 	}
1296 
1297 	intel_runtime_pm_get(dev_priv);
1298 
1299 	ret = i915_mutex_lock_interruptible(dev);
1300 	if (ret)
1301 		goto pre_mutex_err;
1302 
1303 	if (dev_priv->ums.mm_suspended) {
1304 		mutex_unlock(&dev->struct_mutex);
1305 		ret = -EBUSY;
1306 		goto pre_mutex_err;
1307 	}
1308 
1309 	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1310 	if (IS_ERR(ctx)) {
1311 		mutex_unlock(&dev->struct_mutex);
1312 		ret = PTR_ERR(ctx);
1313 		goto pre_mutex_err;
1314 	}
1315 
1316 	i915_gem_context_reference(ctx);
1317 
1318 	if (ctx->ppgtt)
1319 		vm = &ctx->ppgtt->base;
1320 	else
1321 		vm = &dev_priv->gtt.base;
1322 
1323 	eb = eb_create(args);
1324 	if (eb == NULL) {
1325 		i915_gem_context_unreference(ctx);
1326 		mutex_unlock(&dev->struct_mutex);
1327 		ret = -ENOMEM;
1328 		goto pre_mutex_err;
1329 	}
1330 
1331 	/* Look up object handles */
1332 	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1333 	if (ret)
1334 		goto err;
1335 
1336 	/* take note of the batch buffer before we might reorder the lists */
1337 	batch_obj = eb_get_batch(eb);
1338 
1339 	/* Move the objects en-masse into the GTT, evicting if necessary. */
1340 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1341 	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1342 	if (ret)
1343 		goto err;
1344 
1345 	/* The objects are in their final locations, apply the relocations. */
1346 	if (need_relocs)
1347 		ret = i915_gem_execbuffer_relocate(eb);
1348 	if (ret) {
1349 		if (ret == -EFAULT) {
1350 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1351 								eb, exec);
1352 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1353 		}
1354 		if (ret)
1355 			goto err;
1356 	}
1357 
1358 	/* Set the pending read domains for the batch buffer to COMMAND */
1359 	if (batch_obj->base.pending_write_domain) {
1360 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1361 		ret = -EINVAL;
1362 		goto err;
1363 	}
1364 	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1365 
1366 	if (i915_needs_cmd_parser(ring)) {
1367 		ret = i915_parse_cmds(ring,
1368 				      batch_obj,
1369 				      args->batch_start_offset,
1370 				      file->is_master);
1371 		if (ret)
1372 			goto err;
1373 
1374 		/*
1375 		 * XXX: Actually do this when enabling batch copy...
1376 		 *
1377 		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1378 		 * from MI_BATCH_BUFFER_START commands issued in the
1379 		 * dispatch_execbuffer implementations. We specifically don't
1380 		 * want that set when the command parser is enabled.
1381 		 */
1382 	}
1383 
1384 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1385 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1386 	 * hsw should have this fixed, but bdw mucks it up again. */
1387 	if (flags & I915_DISPATCH_SECURE) {
1388 		/*
1389 		 * So on first glance it looks freaky that we pin the batch here
1390 		 * outside of the reservation loop. But:
1391 		 * - The batch is already pinned into the relevant ppgtt, so we
1392 		 *   already have the backing storage fully allocated.
1393 		 * - No other BO uses the global gtt (well contexts, but meh),
1394 		 *   so we don't really have issues with mutliple objects not
1395 		 *   fitting due to fragmentation.
1396 		 * So this is actually safe.
1397 		 */
1398 		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1399 		if (ret)
1400 			goto err;
1401 
1402 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1403 	} else
1404 		exec_start += i915_gem_obj_offset(batch_obj, vm);
1405 
1406 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
1407 				      &eb->vmas, batch_obj, exec_start, flags);
1408 
1409 	/*
1410 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1411 	 * batch vma for correctness. For less ugly and less fragility this
1412 	 * needs to be adjusted to also track the ggtt batch vma properly as
1413 	 * active.
1414 	 */
1415 	if (flags & I915_DISPATCH_SECURE)
1416 		i915_gem_object_ggtt_unpin(batch_obj);
1417 err:
1418 	/* the request owns the ref now */
1419 	i915_gem_context_unreference(ctx);
1420 	eb_destroy(eb);
1421 
1422 	mutex_unlock(&dev->struct_mutex);
1423 
1424 pre_mutex_err:
1425 	/* intel_gpu_busy should also get a ref, so it will free when the device
1426 	 * is really idle. */
1427 	intel_runtime_pm_put(dev_priv);
1428 	return ret;
1429 }
1430 
1431 /*
1432  * Legacy execbuffer just creates an exec2 list from the original exec object
1433  * list array and passes it to the real function.
1434  */
1435 int
i915_gem_execbuffer(struct drm_device * dev,void * data,struct drm_file * file)1436 i915_gem_execbuffer(struct drm_device *dev, void *data,
1437 		    struct drm_file *file)
1438 {
1439 	struct drm_i915_gem_execbuffer *args = data;
1440 	struct drm_i915_gem_execbuffer2 exec2;
1441 	struct drm_i915_gem_exec_object *exec_list = NULL;
1442 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1443 	int ret, i;
1444 
1445 	if (args->buffer_count < 1) {
1446 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1447 		return -EINVAL;
1448 	}
1449 
1450 	/* Copy in the exec list from userland */
1451 	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1452 	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1453 	if (exec_list == NULL || exec2_list == NULL) {
1454 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1455 			  args->buffer_count);
1456 		drm_free_large(exec_list);
1457 		drm_free_large(exec2_list);
1458 		return -ENOMEM;
1459 	}
1460 	ret = copy_from_user(exec_list,
1461 			     to_user_ptr(args->buffers_ptr),
1462 			     sizeof(*exec_list) * args->buffer_count);
1463 	if (ret != 0) {
1464 		DRM_DEBUG("copy %d exec entries failed %d\n",
1465 			  args->buffer_count, ret);
1466 		drm_free_large(exec_list);
1467 		drm_free_large(exec2_list);
1468 		return -EFAULT;
1469 	}
1470 
1471 	for (i = 0; i < args->buffer_count; i++) {
1472 		exec2_list[i].handle = exec_list[i].handle;
1473 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1474 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1475 		exec2_list[i].alignment = exec_list[i].alignment;
1476 		exec2_list[i].offset = exec_list[i].offset;
1477 		if (INTEL_INFO(dev)->gen < 4)
1478 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1479 		else
1480 			exec2_list[i].flags = 0;
1481 	}
1482 
1483 	exec2.buffers_ptr = args->buffers_ptr;
1484 	exec2.buffer_count = args->buffer_count;
1485 	exec2.batch_start_offset = args->batch_start_offset;
1486 	exec2.batch_len = args->batch_len;
1487 	exec2.DR1 = args->DR1;
1488 	exec2.DR4 = args->DR4;
1489 	exec2.num_cliprects = args->num_cliprects;
1490 	exec2.cliprects_ptr = args->cliprects_ptr;
1491 	exec2.flags = I915_EXEC_RENDER;
1492 	i915_execbuffer2_set_context_id(exec2, 0);
1493 
1494 	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1495 	if (!ret) {
1496 		struct drm_i915_gem_exec_object __user *user_exec_list =
1497 			to_user_ptr(args->buffers_ptr);
1498 
1499 		/* Copy the new buffer offsets back to the user's exec list. */
1500 		for (i = 0; i < args->buffer_count; i++) {
1501 			ret = __copy_to_user(&user_exec_list[i].offset,
1502 					     &exec2_list[i].offset,
1503 					     sizeof(user_exec_list[i].offset));
1504 			if (ret) {
1505 				ret = -EFAULT;
1506 				DRM_DEBUG("failed to copy %d exec entries "
1507 					  "back to user (%d)\n",
1508 					  args->buffer_count, ret);
1509 				break;
1510 			}
1511 		}
1512 	}
1513 
1514 	drm_free_large(exec_list);
1515 	drm_free_large(exec2_list);
1516 	return ret;
1517 }
1518 
1519 int
i915_gem_execbuffer2(struct drm_device * dev,void * data,struct drm_file * file)1520 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1521 		     struct drm_file *file)
1522 {
1523 	struct drm_i915_gem_execbuffer2 *args = data;
1524 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1525 	int ret;
1526 
1527 	if (args->buffer_count < 1 ||
1528 	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1529 		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1530 		return -EINVAL;
1531 	}
1532 
1533 	if (args->rsvd2 != 0) {
1534 		DRM_DEBUG("dirty rvsd2 field\n");
1535 		return -EINVAL;
1536 	}
1537 
1538 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1539 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1540 	if (exec2_list == NULL)
1541 		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1542 					   args->buffer_count);
1543 	if (exec2_list == NULL) {
1544 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1545 			  args->buffer_count);
1546 		return -ENOMEM;
1547 	}
1548 	ret = copy_from_user(exec2_list,
1549 			     to_user_ptr(args->buffers_ptr),
1550 			     sizeof(*exec2_list) * args->buffer_count);
1551 	if (ret != 0) {
1552 		DRM_DEBUG("copy %d exec entries failed %d\n",
1553 			  args->buffer_count, ret);
1554 		drm_free_large(exec2_list);
1555 		return -EFAULT;
1556 	}
1557 
1558 	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1559 	if (!ret) {
1560 		/* Copy the new buffer offsets back to the user's exec list. */
1561 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1562 				   to_user_ptr(args->buffers_ptr);
1563 		int i;
1564 
1565 		for (i = 0; i < args->buffer_count; i++) {
1566 			ret = __copy_to_user(&user_exec_list[i].offset,
1567 					     &exec2_list[i].offset,
1568 					     sizeof(user_exec_list[i].offset));
1569 			if (ret) {
1570 				ret = -EFAULT;
1571 				DRM_DEBUG("failed to copy %d exec entries "
1572 					  "back to user\n",
1573 					  args->buffer_count);
1574 				break;
1575 			}
1576 		}
1577 	}
1578 
1579 	drm_free_large(exec2_list);
1580 	return ret;
1581 }
1582