• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright � 2007 Red Hat Inc.
4  * Copyright � 2007-2012 Intel Corporation
5  * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  *
29  **************************************************************************/
30 /*
31  * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32  *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33  *	    Eric Anholt <eric@anholt.net>
34  *	    Dave Airlie <airlied@linux.ie>
35  */
36 
37 #include <xf86drm.h>
38 #include <xf86atomic.h>
39 #include <fcntl.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <assert.h>
45 #include <pthread.h>
46 #include <sys/ioctl.h>
47 #include <sys/stat.h>
48 #include <sys/types.h>
49 #include <stdbool.h>
50 
51 #include "errno.h"
52 #ifndef ETIME
53 #define ETIME ETIMEDOUT
54 #endif
55 #include "libdrm_macros.h"
56 #include "libdrm_lists.h"
57 #include "intel_bufmgr.h"
58 #include "intel_bufmgr_priv.h"
59 #include "intel_chipset.h"
60 #include "string.h"
61 
62 #include "i915_drm.h"
63 #include "uthash.h"
64 
65 #if HAVE_VALGRIND
66 #include <valgrind.h>
67 #include <memcheck.h>
68 #define VG(x) x
69 #else
70 #define VG(x)
71 #endif
72 
73 #define memclear(s) memset(&s, 0, sizeof(s))
74 
75 #define DBG(...) do {					\
76 	if (bufmgr_gem->bufmgr.debug)			\
77 		fprintf(stderr, __VA_ARGS__);		\
78 } while (0)
79 
80 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
81 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
82 
83 /**
84  * upper_32_bits - return bits 32-63 of a number
85  * @n: the number we're accessing
86  *
87  * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
88  * the "right shift count >= width of type" warning when that quantity is
89  * 32-bits.
90  */
91 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
92 
93 /**
94  * lower_32_bits - return bits 0-31 of a number
95  * @n: the number we're accessing
96  */
97 #define lower_32_bits(n) ((__u32)(n))
98 
99 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
100 
101 struct drm_intel_gem_bo_bucket {
102 	drmMMListHead head;
103 	unsigned long size;
104 };
105 
106 typedef struct _drm_intel_bufmgr_gem {
107 	drm_intel_bufmgr bufmgr;
108 
109 	atomic_t refcount;
110 
111 	int fd;
112 
113 	int max_relocs;
114 
115 	pthread_mutex_t lock;
116 
117 	struct drm_i915_gem_exec_object2 *exec2_objects;
118 	drm_intel_bo **exec_bos;
119 	int exec_size;
120 	int exec_count;
121 
122 	/** Array of lists of cached gem objects of power-of-two sizes */
123 	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
124 	int num_buckets;
125 	time_t time;
126 
127 	drmMMListHead managers;
128 
129 	drm_intel_bo_gem *name_table;
130 	drm_intel_bo_gem *handle_table;
131 
132 	drmMMListHead vma_cache;
133 	int vma_count, vma_open, vma_max;
134 
135 	uint64_t gtt_size;
136 	int available_fences;
137 	int pci_device;
138 	int gen;
139 	unsigned int has_bsd : 1;
140 	unsigned int has_blt : 1;
141 	unsigned int has_relaxed_fencing : 1;
142 	unsigned int has_llc : 1;
143 	unsigned int has_wait_timeout : 1;
144 	unsigned int bo_reuse : 1;
145 	unsigned int no_exec : 1;
146 	unsigned int has_vebox : 1;
147 	unsigned int has_exec_async : 1;
148 	bool fenced_relocs;
149 
150 	struct {
151 		void *ptr;
152 		uint32_t handle;
153 	} userptr_active;
154 
155 } drm_intel_bufmgr_gem;
156 
157 #define DRM_INTEL_RELOC_FENCE (1<<0)
158 
159 typedef struct _drm_intel_reloc_target_info {
160 	drm_intel_bo *bo;
161 	int flags;
162 } drm_intel_reloc_target;
163 
164 struct _drm_intel_bo_gem {
165 	drm_intel_bo bo;
166 
167 	atomic_t refcount;
168 	uint32_t gem_handle;
169 	const char *name;
170 
171 	/**
172 	 * Kenel-assigned global name for this object
173          *
174          * List contains both flink named and prime fd'd objects
175 	 */
176 	unsigned int global_name;
177 
178 	UT_hash_handle handle_hh;
179 	UT_hash_handle name_hh;
180 
181 	/**
182 	 * Index of the buffer within the validation list while preparing a
183 	 * batchbuffer execution.
184 	 */
185 	int validate_index;
186 
187 	/**
188 	 * Current tiling mode
189 	 */
190 	uint32_t tiling_mode;
191 	uint32_t swizzle_mode;
192 	unsigned long stride;
193 
194 	unsigned long kflags;
195 
196 	time_t free_time;
197 
198 	/** Array passed to the DRM containing relocation information. */
199 	struct drm_i915_gem_relocation_entry *relocs;
200 	/**
201 	 * Array of info structs corresponding to relocs[i].target_handle etc
202 	 */
203 	drm_intel_reloc_target *reloc_target_info;
204 	/** Number of entries in relocs */
205 	int reloc_count;
206 	/** Array of BOs that are referenced by this buffer and will be softpinned */
207 	drm_intel_bo **softpin_target;
208 	/** Number softpinned BOs that are referenced by this buffer */
209 	int softpin_target_count;
210 	/** Maximum amount of softpinned BOs that are referenced by this buffer */
211 	int softpin_target_size;
212 
213 	/** Mapped address for the buffer, saved across map/unmap cycles */
214 	void *mem_virtual;
215 	/** GTT virtual address for the buffer, saved across map/unmap cycles */
216 	void *gtt_virtual;
217 	/** WC CPU address for the buffer, saved across map/unmap cycles */
218 	void *wc_virtual;
219 	/**
220 	 * Virtual address of the buffer allocated by user, used for userptr
221 	 * objects only.
222 	 */
223 	void *user_virtual;
224 	int map_count;
225 	drmMMListHead vma_list;
226 
227 	/** BO cache list */
228 	drmMMListHead head;
229 
230 	/**
231 	 * Boolean of whether this BO and its children have been included in
232 	 * the current drm_intel_bufmgr_check_aperture_space() total.
233 	 */
234 	bool included_in_check_aperture;
235 
236 	/**
237 	 * Boolean of whether this buffer has been used as a relocation
238 	 * target and had its size accounted for, and thus can't have any
239 	 * further relocations added to it.
240 	 */
241 	bool used_as_reloc_target;
242 
243 	/**
244 	 * Boolean of whether we have encountered an error whilst building the relocation tree.
245 	 */
246 	bool has_error;
247 
248 	/**
249 	 * Boolean of whether this buffer can be re-used
250 	 */
251 	bool reusable;
252 
253 	/**
254 	 * Boolean of whether the GPU is definitely not accessing the buffer.
255 	 *
256 	 * This is only valid when reusable, since non-reusable
257 	 * buffers are those that have been shared with other
258 	 * processes, so we don't know their state.
259 	 */
260 	bool idle;
261 
262 	/**
263 	 * Boolean of whether this buffer was allocated with userptr
264 	 */
265 	bool is_userptr;
266 
267 	/**
268 	 * Size in bytes of this buffer and its relocation descendents.
269 	 *
270 	 * Used to avoid costly tree walking in
271 	 * drm_intel_bufmgr_check_aperture in the common case.
272 	 */
273 	int reloc_tree_size;
274 
275 	/**
276 	 * Number of potential fence registers required by this buffer and its
277 	 * relocations.
278 	 */
279 	int reloc_tree_fences;
280 
281 	/** Flags that we may need to do the SW_FINISH ioctl on unmap. */
282 	bool mapped_cpu_write;
283 };
284 
285 static unsigned int
286 drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
287 
288 static unsigned int
289 drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
290 
291 static int
292 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
293 			    uint32_t * swizzle_mode);
294 
295 static int
296 drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
297 				     uint32_t tiling_mode,
298 				     uint32_t stride);
299 
300 static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
301 						      time_t time);
302 
303 static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
304 
305 static void drm_intel_gem_bo_free(drm_intel_bo *bo);
306 
to_bo_gem(drm_intel_bo * bo)307 static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo)
308 {
309         return (drm_intel_bo_gem *)bo;
310 }
311 
312 static unsigned long
drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem * bufmgr_gem,unsigned long size,uint32_t * tiling_mode)313 drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
314 			   uint32_t *tiling_mode)
315 {
316 	unsigned long min_size, max_size;
317 	unsigned long i;
318 
319 	if (*tiling_mode == I915_TILING_NONE)
320 		return size;
321 
322 	/* 965+ just need multiples of page size for tiling */
323 	if (bufmgr_gem->gen >= 4)
324 		return ROUND_UP_TO(size, 4096);
325 
326 	/* Older chips need powers of two, of at least 512k or 1M */
327 	if (bufmgr_gem->gen == 3) {
328 		min_size = 1024*1024;
329 		max_size = 128*1024*1024;
330 	} else {
331 		min_size = 512*1024;
332 		max_size = 64*1024*1024;
333 	}
334 
335 	if (size > max_size) {
336 		*tiling_mode = I915_TILING_NONE;
337 		return size;
338 	}
339 
340 	/* Do we need to allocate every page for the fence? */
341 	if (bufmgr_gem->has_relaxed_fencing)
342 		return ROUND_UP_TO(size, 4096);
343 
344 	for (i = min_size; i < size; i <<= 1)
345 		;
346 
347 	return i;
348 }
349 
350 /*
351  * Round a given pitch up to the minimum required for X tiling on a
352  * given chip.  We use 512 as the minimum to allow for a later tiling
353  * change.
354  */
355 static unsigned long
drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem * bufmgr_gem,unsigned long pitch,uint32_t * tiling_mode)356 drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
357 			    unsigned long pitch, uint32_t *tiling_mode)
358 {
359 	unsigned long tile_width;
360 	unsigned long i;
361 
362 	/* If untiled, then just align it so that we can do rendering
363 	 * to it with the 3D engine.
364 	 */
365 	if (*tiling_mode == I915_TILING_NONE)
366 		return ALIGN(pitch, 64);
367 
368 	if (*tiling_mode == I915_TILING_X
369 			|| (IS_915(bufmgr_gem->pci_device)
370 			    && *tiling_mode == I915_TILING_Y))
371 		tile_width = 512;
372 	else
373 		tile_width = 128;
374 
375 	/* 965 is flexible */
376 	if (bufmgr_gem->gen >= 4)
377 		return ROUND_UP_TO(pitch, tile_width);
378 
379 	/* The older hardware has a maximum pitch of 8192 with tiled
380 	 * surfaces, so fallback to untiled if it's too large.
381 	 */
382 	if (pitch > 8192) {
383 		*tiling_mode = I915_TILING_NONE;
384 		return ALIGN(pitch, 64);
385 	}
386 
387 	/* Pre-965 needs power of two tile width */
388 	for (i = tile_width; i < pitch; i <<= 1)
389 		;
390 
391 	return i;
392 }
393 
394 static struct drm_intel_gem_bo_bucket *
drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem * bufmgr_gem,unsigned long size)395 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
396 				 unsigned long size)
397 {
398 	int i;
399 
400 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
401 		struct drm_intel_gem_bo_bucket *bucket =
402 		    &bufmgr_gem->cache_bucket[i];
403 		if (bucket->size >= size) {
404 			return bucket;
405 		}
406 	}
407 
408 	return NULL;
409 }
410 
411 static void
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem * bufmgr_gem)412 drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
413 {
414 	int i, j;
415 
416 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
417 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
418 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
419 
420 		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
421 			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
422 			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
423 			    bo_gem->name);
424 			continue;
425 		}
426 
427 		for (j = 0; j < bo_gem->reloc_count; j++) {
428 			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
429 			drm_intel_bo_gem *target_gem =
430 			    (drm_intel_bo_gem *) target_bo;
431 
432 			DBG("%2d: %d %s(%s)@0x%08x %08x -> "
433 			    "%d (%s)@0x%08x %08x + 0x%08x\n",
434 			    i,
435 			    bo_gem->gem_handle,
436 			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
437 			    bo_gem->name,
438 			    upper_32_bits(bo_gem->relocs[j].offset),
439 			    lower_32_bits(bo_gem->relocs[j].offset),
440 			    target_gem->gem_handle,
441 			    target_gem->name,
442 			    upper_32_bits(target_bo->offset64),
443 			    lower_32_bits(target_bo->offset64),
444 			    bo_gem->relocs[j].delta);
445 		}
446 
447 		for (j = 0; j < bo_gem->softpin_target_count; j++) {
448 			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
449 			drm_intel_bo_gem *target_gem =
450 			    (drm_intel_bo_gem *) target_bo;
451 			DBG("%2d: %d %s(%s) -> "
452 			    "%d *(%s)@0x%08x %08x\n",
453 			    i,
454 			    bo_gem->gem_handle,
455 			    bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
456 			    bo_gem->name,
457 			    target_gem->gem_handle,
458 			    target_gem->name,
459 			    upper_32_bits(target_bo->offset64),
460 			    lower_32_bits(target_bo->offset64));
461 		}
462 	}
463 }
464 
465 static inline void
drm_intel_gem_bo_reference(drm_intel_bo * bo)466 drm_intel_gem_bo_reference(drm_intel_bo *bo)
467 {
468 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
469 
470 	atomic_inc(&bo_gem->refcount);
471 }
472 
473 /**
474  * Adds the given buffer to the list of buffers to be validated (moved into the
475  * appropriate memory type) with the next batch submission.
476  *
477  * If a buffer is validated multiple times in a batch submission, it ends up
478  * with the intersection of the memory type flags and the union of the
479  * access flags.
480  */
481 static void
drm_intel_add_validate_buffer2(drm_intel_bo * bo,int need_fence)482 drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
483 {
484 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
485 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
486 	int index;
487 	unsigned long flags;
488 
489 	flags = 0;
490 	if (need_fence)
491 		flags |= EXEC_OBJECT_NEEDS_FENCE;
492 
493 	if (bo_gem->validate_index != -1) {
494 		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
495 		return;
496 	}
497 
498 	/* Extend the array of validation entries as necessary. */
499 	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
500 		int new_size = bufmgr_gem->exec_size * 2;
501 
502 		if (new_size == 0)
503 			new_size = 5;
504 
505 		bufmgr_gem->exec2_objects =
506 			realloc(bufmgr_gem->exec2_objects,
507 				sizeof(*bufmgr_gem->exec2_objects) * new_size);
508 		bufmgr_gem->exec_bos =
509 			realloc(bufmgr_gem->exec_bos,
510 				sizeof(*bufmgr_gem->exec_bos) * new_size);
511 		bufmgr_gem->exec_size = new_size;
512 	}
513 
514 	index = bufmgr_gem->exec_count;
515 	bo_gem->validate_index = index;
516 	/* Fill in array entry */
517 	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
518 	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
519 	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
520 	bufmgr_gem->exec2_objects[index].alignment = bo->align;
521 	bufmgr_gem->exec2_objects[index].offset = bo->offset64;
522 	bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
523 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
524 	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
525 	bufmgr_gem->exec_bos[index] = bo;
526 	bufmgr_gem->exec_count++;
527 }
528 
529 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
530 	sizeof(uint32_t))
531 
532 static void
drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem * bufmgr_gem,drm_intel_bo_gem * bo_gem,unsigned int alignment)533 drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
534 				      drm_intel_bo_gem *bo_gem,
535 				      unsigned int alignment)
536 {
537 	unsigned int size;
538 
539 	assert(!bo_gem->used_as_reloc_target);
540 
541 	/* The older chipsets are far-less flexible in terms of tiling,
542 	 * and require tiled buffer to be size aligned in the aperture.
543 	 * This means that in the worst possible case we will need a hole
544 	 * twice as large as the object in order for it to fit into the
545 	 * aperture. Optimal packing is for wimps.
546 	 */
547 	size = bo_gem->bo.size;
548 	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
549 		unsigned int min_size;
550 
551 		if (bufmgr_gem->has_relaxed_fencing) {
552 			if (bufmgr_gem->gen == 3)
553 				min_size = 1024*1024;
554 			else
555 				min_size = 512*1024;
556 
557 			while (min_size < size)
558 				min_size *= 2;
559 		} else
560 			min_size = size;
561 
562 		/* Account for worst-case alignment. */
563 		alignment = MAX2(alignment, min_size);
564 	}
565 
566 	bo_gem->reloc_tree_size = size + alignment;
567 }
568 
569 static int
drm_intel_setup_reloc_list(drm_intel_bo * bo)570 drm_intel_setup_reloc_list(drm_intel_bo *bo)
571 {
572 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
573 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
574 	unsigned int max_relocs = bufmgr_gem->max_relocs;
575 
576 	if (bo->size / 4 < max_relocs)
577 		max_relocs = bo->size / 4;
578 
579 	bo_gem->relocs = malloc(max_relocs *
580 				sizeof(struct drm_i915_gem_relocation_entry));
581 	bo_gem->reloc_target_info = malloc(max_relocs *
582 					   sizeof(drm_intel_reloc_target));
583 	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
584 		bo_gem->has_error = true;
585 
586 		free (bo_gem->relocs);
587 		bo_gem->relocs = NULL;
588 
589 		free (bo_gem->reloc_target_info);
590 		bo_gem->reloc_target_info = NULL;
591 
592 		return 1;
593 	}
594 
595 	return 0;
596 }
597 
598 static int
drm_intel_gem_bo_busy(drm_intel_bo * bo)599 drm_intel_gem_bo_busy(drm_intel_bo *bo)
600 {
601 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
602 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
603 	struct drm_i915_gem_busy busy;
604 	int ret;
605 
606 	if (bo_gem->reusable && bo_gem->idle)
607 		return false;
608 
609 	memclear(busy);
610 	busy.handle = bo_gem->gem_handle;
611 
612 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
613 	if (ret == 0) {
614 		bo_gem->idle = !busy.busy;
615 		return busy.busy;
616 	} else {
617 		return false;
618 	}
619 }
620 
621 static int
drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem * bufmgr_gem,drm_intel_bo_gem * bo_gem,int state)622 drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
623 				  drm_intel_bo_gem *bo_gem, int state)
624 {
625 	struct drm_i915_gem_madvise madv;
626 
627 	memclear(madv);
628 	madv.handle = bo_gem->gem_handle;
629 	madv.madv = state;
630 	madv.retained = 1;
631 	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
632 
633 	return madv.retained;
634 }
635 
636 static int
drm_intel_gem_bo_madvise(drm_intel_bo * bo,int madv)637 drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
638 {
639 	return drm_intel_gem_bo_madvise_internal
640 		((drm_intel_bufmgr_gem *) bo->bufmgr,
641 		 (drm_intel_bo_gem *) bo,
642 		 madv);
643 }
644 
645 /* drop the oldest entries that have been purged by the kernel */
646 static void
drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem * bufmgr_gem,struct drm_intel_gem_bo_bucket * bucket)647 drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
648 				    struct drm_intel_gem_bo_bucket *bucket)
649 {
650 	while (!DRMLISTEMPTY(&bucket->head)) {
651 		drm_intel_bo_gem *bo_gem;
652 
653 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
654 				      bucket->head.next, head);
655 		if (drm_intel_gem_bo_madvise_internal
656 		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
657 			break;
658 
659 		DRMLISTDEL(&bo_gem->head);
660 		drm_intel_gem_bo_free(&bo_gem->bo);
661 	}
662 }
663 
664 static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned long flags,uint32_t tiling_mode,unsigned long stride,unsigned int alignment)665 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
666 				const char *name,
667 				unsigned long size,
668 				unsigned long flags,
669 				uint32_t tiling_mode,
670 				unsigned long stride,
671 				unsigned int alignment)
672 {
673 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
674 	drm_intel_bo_gem *bo_gem;
675 	unsigned int page_size = getpagesize();
676 	int ret;
677 	struct drm_intel_gem_bo_bucket *bucket;
678 	bool alloc_from_cache;
679 	unsigned long bo_size;
680 	bool for_render = false;
681 
682 	if (flags & BO_ALLOC_FOR_RENDER)
683 		for_render = true;
684 
685 	/* Round the allocated size up to a power of two number of pages. */
686 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
687 
688 	/* If we don't have caching at this size, don't actually round the
689 	 * allocation up.
690 	 */
691 	if (bucket == NULL) {
692 		bo_size = size;
693 		if (bo_size < page_size)
694 			bo_size = page_size;
695 	} else {
696 		bo_size = bucket->size;
697 	}
698 
699 	pthread_mutex_lock(&bufmgr_gem->lock);
700 	/* Get a buffer out of the cache if available */
701 retry:
702 	alloc_from_cache = false;
703 	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
704 		if (for_render) {
705 			/* Allocate new render-target BOs from the tail (MRU)
706 			 * of the list, as it will likely be hot in the GPU
707 			 * cache and in the aperture for us.
708 			 */
709 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
710 					      bucket->head.prev, head);
711 			DRMLISTDEL(&bo_gem->head);
712 			alloc_from_cache = true;
713 			bo_gem->bo.align = alignment;
714 		} else {
715 			assert(alignment == 0);
716 			/* For non-render-target BOs (where we're probably
717 			 * going to map it first thing in order to fill it
718 			 * with data), check if the last BO in the cache is
719 			 * unbusy, and only reuse in that case. Otherwise,
720 			 * allocating a new buffer is probably faster than
721 			 * waiting for the GPU to finish.
722 			 */
723 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
724 					      bucket->head.next, head);
725 			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
726 				alloc_from_cache = true;
727 				DRMLISTDEL(&bo_gem->head);
728 			}
729 		}
730 
731 		if (alloc_from_cache) {
732 			if (!drm_intel_gem_bo_madvise_internal
733 			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
734 				drm_intel_gem_bo_free(&bo_gem->bo);
735 				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
736 								    bucket);
737 				goto retry;
738 			}
739 
740 			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
741 								 tiling_mode,
742 								 stride)) {
743 				drm_intel_gem_bo_free(&bo_gem->bo);
744 				goto retry;
745 			}
746 		}
747 	}
748 
749 	if (!alloc_from_cache) {
750 		struct drm_i915_gem_create create;
751 
752 		bo_gem = calloc(1, sizeof(*bo_gem));
753 		if (!bo_gem)
754 			goto err;
755 
756 		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
757 		   list (vma_list), so better set the list head here */
758 		DRMINITLISTHEAD(&bo_gem->vma_list);
759 
760 		bo_gem->bo.size = bo_size;
761 
762 		memclear(create);
763 		create.size = bo_size;
764 
765 		ret = drmIoctl(bufmgr_gem->fd,
766 			       DRM_IOCTL_I915_GEM_CREATE,
767 			       &create);
768 		if (ret != 0) {
769 			free(bo_gem);
770 			goto err;
771 		}
772 
773 		bo_gem->gem_handle = create.handle;
774 		HASH_ADD(handle_hh, bufmgr_gem->handle_table,
775 			 gem_handle, sizeof(bo_gem->gem_handle),
776 			 bo_gem);
777 
778 		bo_gem->bo.handle = bo_gem->gem_handle;
779 		bo_gem->bo.bufmgr = bufmgr;
780 		bo_gem->bo.align = alignment;
781 
782 		bo_gem->tiling_mode = I915_TILING_NONE;
783 		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
784 		bo_gem->stride = 0;
785 
786 		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
787 							 tiling_mode,
788 							 stride))
789 			goto err_free;
790 	}
791 
792 	bo_gem->name = name;
793 	atomic_set(&bo_gem->refcount, 1);
794 	bo_gem->validate_index = -1;
795 	bo_gem->reloc_tree_fences = 0;
796 	bo_gem->used_as_reloc_target = false;
797 	bo_gem->has_error = false;
798 	bo_gem->reusable = true;
799 
800 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
801 	pthread_mutex_unlock(&bufmgr_gem->lock);
802 
803 	DBG("bo_create: buf %d (%s) %ldb\n",
804 	    bo_gem->gem_handle, bo_gem->name, size);
805 
806 	return &bo_gem->bo;
807 
808 err_free:
809 	drm_intel_gem_bo_free(&bo_gem->bo);
810 err:
811 	pthread_mutex_unlock(&bufmgr_gem->lock);
812 	return NULL;
813 }
814 
815 static drm_intel_bo *
drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)816 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
817 				  const char *name,
818 				  unsigned long size,
819 				  unsigned int alignment)
820 {
821 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
822 					       BO_ALLOC_FOR_RENDER,
823 					       I915_TILING_NONE, 0,
824 					       alignment);
825 }
826 
827 static drm_intel_bo *
drm_intel_gem_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)828 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
829 		       const char *name,
830 		       unsigned long size,
831 		       unsigned int alignment)
832 {
833 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
834 					       I915_TILING_NONE, 0, 0);
835 }
836 
837 static drm_intel_bo *
drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,const char * name,int x,int y,int cpp,uint32_t * tiling_mode,unsigned long * pitch,unsigned long flags)838 drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
839 			     int x, int y, int cpp, uint32_t *tiling_mode,
840 			     unsigned long *pitch, unsigned long flags)
841 {
842 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
843 	unsigned long size, stride;
844 	uint32_t tiling;
845 
846 	do {
847 		unsigned long aligned_y, height_alignment;
848 
849 		tiling = *tiling_mode;
850 
851 		/* If we're tiled, our allocations are in 8 or 32-row blocks,
852 		 * so failure to align our height means that we won't allocate
853 		 * enough pages.
854 		 *
855 		 * If we're untiled, we still have to align to 2 rows high
856 		 * because the data port accesses 2x2 blocks even if the
857 		 * bottom row isn't to be rendered, so failure to align means
858 		 * we could walk off the end of the GTT and fault.  This is
859 		 * documented on 965, and may be the case on older chipsets
860 		 * too so we try to be careful.
861 		 */
862 		aligned_y = y;
863 		height_alignment = 2;
864 
865 		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
866 			height_alignment = 16;
867 		else if (tiling == I915_TILING_X
868 			|| (IS_915(bufmgr_gem->pci_device)
869 			    && tiling == I915_TILING_Y))
870 			height_alignment = 8;
871 		else if (tiling == I915_TILING_Y)
872 			height_alignment = 32;
873 		aligned_y = ALIGN(y, height_alignment);
874 
875 		stride = x * cpp;
876 		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
877 		size = stride * aligned_y;
878 		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
879 	} while (*tiling_mode != tiling);
880 	*pitch = stride;
881 
882 	if (tiling == I915_TILING_NONE)
883 		stride = 0;
884 
885 	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
886 					       tiling, stride, 0);
887 }
888 
889 static drm_intel_bo *
drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr * bufmgr,const char * name,void * addr,uint32_t tiling_mode,uint32_t stride,unsigned long size,unsigned long flags)890 drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
891 				const char *name,
892 				void *addr,
893 				uint32_t tiling_mode,
894 				uint32_t stride,
895 				unsigned long size,
896 				unsigned long flags)
897 {
898 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
899 	drm_intel_bo_gem *bo_gem;
900 	int ret;
901 	struct drm_i915_gem_userptr userptr;
902 
903 	/* Tiling with userptr surfaces is not supported
904 	 * on all hardware so refuse it for time being.
905 	 */
906 	if (tiling_mode != I915_TILING_NONE)
907 		return NULL;
908 
909 	bo_gem = calloc(1, sizeof(*bo_gem));
910 	if (!bo_gem)
911 		return NULL;
912 
913 	atomic_set(&bo_gem->refcount, 1);
914 	DRMINITLISTHEAD(&bo_gem->vma_list);
915 
916 	bo_gem->bo.size = size;
917 
918 	memclear(userptr);
919 	userptr.user_ptr = (__u64)((unsigned long)addr);
920 	userptr.user_size = size;
921 	userptr.flags = flags;
922 
923 	ret = drmIoctl(bufmgr_gem->fd,
924 			DRM_IOCTL_I915_GEM_USERPTR,
925 			&userptr);
926 	if (ret != 0) {
927 		DBG("bo_create_userptr: "
928 		    "ioctl failed with user ptr %p size 0x%lx, "
929 		    "user flags 0x%lx\n", addr, size, flags);
930 		free(bo_gem);
931 		return NULL;
932 	}
933 
934 	pthread_mutex_lock(&bufmgr_gem->lock);
935 
936 	bo_gem->gem_handle = userptr.handle;
937 	bo_gem->bo.handle = bo_gem->gem_handle;
938 	bo_gem->bo.bufmgr    = bufmgr;
939 	bo_gem->is_userptr   = true;
940 	bo_gem->bo.virtual   = addr;
941 	/* Save the address provided by user */
942 	bo_gem->user_virtual = addr;
943 	bo_gem->tiling_mode  = I915_TILING_NONE;
944 	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
945 	bo_gem->stride       = 0;
946 
947 	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
948 		 gem_handle, sizeof(bo_gem->gem_handle),
949 		 bo_gem);
950 
951 	bo_gem->name = name;
952 	bo_gem->validate_index = -1;
953 	bo_gem->reloc_tree_fences = 0;
954 	bo_gem->used_as_reloc_target = false;
955 	bo_gem->has_error = false;
956 	bo_gem->reusable = false;
957 
958 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
959 	pthread_mutex_unlock(&bufmgr_gem->lock);
960 
961 	DBG("bo_create_userptr: "
962 	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
963 		addr, bo_gem->gem_handle, bo_gem->name,
964 		size, stride, tiling_mode);
965 
966 	return &bo_gem->bo;
967 }
968 
969 static bool
has_userptr(drm_intel_bufmgr_gem * bufmgr_gem)970 has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
971 {
972 	int ret;
973 	void *ptr;
974 	long pgsz;
975 	struct drm_i915_gem_userptr userptr;
976 
977 	pgsz = sysconf(_SC_PAGESIZE);
978 	assert(pgsz > 0);
979 
980 	ret = posix_memalign(&ptr, pgsz, pgsz);
981 	if (ret) {
982 		DBG("Failed to get a page (%ld) for userptr detection!\n",
983 			pgsz);
984 		return false;
985 	}
986 
987 	memclear(userptr);
988 	userptr.user_ptr = (__u64)(unsigned long)ptr;
989 	userptr.user_size = pgsz;
990 
991 retry:
992 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
993 	if (ret) {
994 		if (errno == ENODEV && userptr.flags == 0) {
995 			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
996 			goto retry;
997 		}
998 		free(ptr);
999 		return false;
1000 	}
1001 
1002 	/* We don't release the userptr bo here as we want to keep the
1003 	 * kernel mm tracking alive for our lifetime. The first time we
1004 	 * create a userptr object the kernel has to install a mmu_notifer
1005 	 * which is a heavyweight operation (e.g. it requires taking all
1006 	 * mm_locks and stop_machine()).
1007 	 */
1008 
1009 	bufmgr_gem->userptr_active.ptr = ptr;
1010 	bufmgr_gem->userptr_active.handle = userptr.handle;
1011 
1012 	return true;
1013 }
1014 
1015 static drm_intel_bo *
check_bo_alloc_userptr(drm_intel_bufmgr * bufmgr,const char * name,void * addr,uint32_t tiling_mode,uint32_t stride,unsigned long size,unsigned long flags)1016 check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
1017 		       const char *name,
1018 		       void *addr,
1019 		       uint32_t tiling_mode,
1020 		       uint32_t stride,
1021 		       unsigned long size,
1022 		       unsigned long flags)
1023 {
1024 	if (has_userptr((drm_intel_bufmgr_gem *)bufmgr))
1025 		bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr;
1026 	else
1027 		bufmgr->bo_alloc_userptr = NULL;
1028 
1029 	return drm_intel_bo_alloc_userptr(bufmgr, name, addr,
1030 					  tiling_mode, stride, size, flags);
1031 }
1032 
get_tiling_mode(drm_intel_bufmgr_gem * bufmgr_gem,uint32_t gem_handle,uint32_t * tiling_mode,uint32_t * swizzle_mode)1033 static int get_tiling_mode(drm_intel_bufmgr_gem *bufmgr_gem,
1034 			   uint32_t gem_handle,
1035 			   uint32_t *tiling_mode,
1036 			   uint32_t *swizzle_mode)
1037 {
1038 	struct drm_i915_gem_get_tiling get_tiling = {
1039 		.handle = gem_handle,
1040 	};
1041 	int ret;
1042 
1043 	ret = drmIoctl(bufmgr_gem->fd,
1044 		       DRM_IOCTL_I915_GEM_GET_TILING,
1045 		       &get_tiling);
1046 	if (ret != 0 && errno != EOPNOTSUPP)
1047 		return ret;
1048 
1049 	*tiling_mode = get_tiling.tiling_mode;
1050 	*swizzle_mode = get_tiling.swizzle_mode;
1051 
1052 	return 0;
1053 }
1054 
1055 /**
1056  * Returns a drm_intel_bo wrapping the given buffer object handle.
1057  *
1058  * This can be used when one application needs to pass a buffer object
1059  * to another.
1060  */
1061 drm_public drm_intel_bo *
drm_intel_bo_gem_create_from_name(drm_intel_bufmgr * bufmgr,const char * name,unsigned int handle)1062 drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
1063 				  const char *name,
1064 				  unsigned int handle)
1065 {
1066 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1067 	drm_intel_bo_gem *bo_gem;
1068 	int ret;
1069 	struct drm_gem_open open_arg;
1070 
1071 	/* At the moment most applications only have a few named bo.
1072 	 * For instance, in a DRI client only the render buffers passed
1073 	 * between X and the client are named. And since X returns the
1074 	 * alternating names for the front/back buffer a linear search
1075 	 * provides a sufficiently fast match.
1076 	 */
1077 	pthread_mutex_lock(&bufmgr_gem->lock);
1078 	HASH_FIND(name_hh, bufmgr_gem->name_table,
1079 		  &handle, sizeof(handle), bo_gem);
1080 	if (bo_gem) {
1081 		drm_intel_gem_bo_reference(&bo_gem->bo);
1082 		goto out;
1083 	}
1084 
1085 	memclear(open_arg);
1086 	open_arg.name = handle;
1087 	ret = drmIoctl(bufmgr_gem->fd,
1088 		       DRM_IOCTL_GEM_OPEN,
1089 		       &open_arg);
1090 	if (ret != 0) {
1091 		DBG("Couldn't reference %s handle 0x%08x: %s\n",
1092 		    name, handle, strerror(errno));
1093 		bo_gem = NULL;
1094 		goto out;
1095 	}
1096         /* Now see if someone has used a prime handle to get this
1097          * object from the kernel before by looking through the list
1098          * again for a matching gem_handle
1099          */
1100 	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1101 		  &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1102 	if (bo_gem) {
1103 		drm_intel_gem_bo_reference(&bo_gem->bo);
1104 		goto out;
1105 	}
1106 
1107 	bo_gem = calloc(1, sizeof(*bo_gem));
1108 	if (!bo_gem)
1109 		goto out;
1110 
1111 	atomic_set(&bo_gem->refcount, 1);
1112 	DRMINITLISTHEAD(&bo_gem->vma_list);
1113 
1114 	bo_gem->bo.size = open_arg.size;
1115 	bo_gem->bo.offset = 0;
1116 	bo_gem->bo.offset64 = 0;
1117 	bo_gem->bo.virtual = NULL;
1118 	bo_gem->bo.bufmgr = bufmgr;
1119 	bo_gem->name = name;
1120 	bo_gem->validate_index = -1;
1121 	bo_gem->gem_handle = open_arg.handle;
1122 	bo_gem->bo.handle = open_arg.handle;
1123 	bo_gem->global_name = handle;
1124 	bo_gem->reusable = false;
1125 
1126 	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1127 		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1128 	HASH_ADD(name_hh, bufmgr_gem->name_table,
1129 		 global_name, sizeof(bo_gem->global_name), bo_gem);
1130 
1131 	ret = get_tiling_mode(bufmgr_gem, bo_gem->gem_handle,
1132 			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
1133 	if (ret != 0)
1134 		goto err_unref;
1135 
1136 	/* XXX stride is unknown */
1137 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1138 	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1139 
1140 out:
1141 	pthread_mutex_unlock(&bufmgr_gem->lock);
1142 	return &bo_gem->bo;
1143 
1144 err_unref:
1145 	drm_intel_gem_bo_free(&bo_gem->bo);
1146 	pthread_mutex_unlock(&bufmgr_gem->lock);
1147 	return NULL;
1148 }
1149 
1150 static void
drm_intel_gem_bo_free(drm_intel_bo * bo)1151 drm_intel_gem_bo_free(drm_intel_bo *bo)
1152 {
1153 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1154 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1155 	int ret;
1156 
1157 	DRMLISTDEL(&bo_gem->vma_list);
1158 	if (bo_gem->mem_virtual) {
1159 		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1160 		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1161 		bufmgr_gem->vma_count--;
1162 	}
1163 	if (bo_gem->wc_virtual) {
1164 		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1165 		drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1166 		bufmgr_gem->vma_count--;
1167 	}
1168 	if (bo_gem->gtt_virtual) {
1169 		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1170 		bufmgr_gem->vma_count--;
1171 	}
1172 
1173 	if (bo_gem->global_name)
1174 		HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1175 	HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1176 
1177 	/* Close this object */
1178 	ret = drmCloseBufferHandle(bufmgr_gem->fd, bo_gem->gem_handle);
1179 	if (ret != 0) {
1180 		DBG("drmCloseBufferHandle %d failed (%s): %s\n",
1181 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1182 	}
1183 	free(bo);
1184 }
1185 
1186 static void
drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo * bo)1187 drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1188 {
1189 #if HAVE_VALGRIND
1190 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1191 
1192 	if (bo_gem->mem_virtual)
1193 		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1194 
1195 	if (bo_gem->wc_virtual)
1196 		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1197 
1198 	if (bo_gem->gtt_virtual)
1199 		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1200 #endif
1201 }
1202 
1203 /** Frees all cached buffers significantly older than @time. */
1204 static void
drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem * bufmgr_gem,time_t time)1205 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1206 {
1207 	int i;
1208 
1209 	if (bufmgr_gem->time == time)
1210 		return;
1211 
1212 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1213 		struct drm_intel_gem_bo_bucket *bucket =
1214 		    &bufmgr_gem->cache_bucket[i];
1215 
1216 		while (!DRMLISTEMPTY(&bucket->head)) {
1217 			drm_intel_bo_gem *bo_gem;
1218 
1219 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1220 					      bucket->head.next, head);
1221 			if (time - bo_gem->free_time <= 1)
1222 				break;
1223 
1224 			DRMLISTDEL(&bo_gem->head);
1225 
1226 			drm_intel_gem_bo_free(&bo_gem->bo);
1227 		}
1228 	}
1229 
1230 	bufmgr_gem->time = time;
1231 }
1232 
drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem * bufmgr_gem)1233 static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1234 {
1235 	int limit;
1236 
1237 	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1238 	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1239 
1240 	if (bufmgr_gem->vma_max < 0)
1241 		return;
1242 
1243 	/* We may need to evict a few entries in order to create new mmaps */
1244 	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1245 	if (limit < 0)
1246 		limit = 0;
1247 
1248 	while (bufmgr_gem->vma_count > limit) {
1249 		drm_intel_bo_gem *bo_gem;
1250 
1251 		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1252 				      bufmgr_gem->vma_cache.next,
1253 				      vma_list);
1254 		assert(bo_gem->map_count == 0);
1255 		DRMLISTDELINIT(&bo_gem->vma_list);
1256 
1257 		if (bo_gem->mem_virtual) {
1258 			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1259 			bo_gem->mem_virtual = NULL;
1260 			bufmgr_gem->vma_count--;
1261 		}
1262 		if (bo_gem->wc_virtual) {
1263 			drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1264 			bo_gem->wc_virtual = NULL;
1265 			bufmgr_gem->vma_count--;
1266 		}
1267 		if (bo_gem->gtt_virtual) {
1268 			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1269 			bo_gem->gtt_virtual = NULL;
1270 			bufmgr_gem->vma_count--;
1271 		}
1272 	}
1273 }
1274 
drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem * bufmgr_gem,drm_intel_bo_gem * bo_gem)1275 static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1276 				       drm_intel_bo_gem *bo_gem)
1277 {
1278 	bufmgr_gem->vma_open--;
1279 	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1280 	if (bo_gem->mem_virtual)
1281 		bufmgr_gem->vma_count++;
1282 	if (bo_gem->wc_virtual)
1283 		bufmgr_gem->vma_count++;
1284 	if (bo_gem->gtt_virtual)
1285 		bufmgr_gem->vma_count++;
1286 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1287 }
1288 
drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem * bufmgr_gem,drm_intel_bo_gem * bo_gem)1289 static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1290 				      drm_intel_bo_gem *bo_gem)
1291 {
1292 	bufmgr_gem->vma_open++;
1293 	DRMLISTDEL(&bo_gem->vma_list);
1294 	if (bo_gem->mem_virtual)
1295 		bufmgr_gem->vma_count--;
1296 	if (bo_gem->wc_virtual)
1297 		bufmgr_gem->vma_count--;
1298 	if (bo_gem->gtt_virtual)
1299 		bufmgr_gem->vma_count--;
1300 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1301 }
1302 
1303 static void
drm_intel_gem_bo_unreference_final(drm_intel_bo * bo,time_t time)1304 drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1305 {
1306 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1307 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1308 	struct drm_intel_gem_bo_bucket *bucket;
1309 	int i;
1310 
1311 	/* Unreference all the target buffers */
1312 	for (i = 0; i < bo_gem->reloc_count; i++) {
1313 		if (bo_gem->reloc_target_info[i].bo != bo) {
1314 			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1315 								  reloc_target_info[i].bo,
1316 								  time);
1317 		}
1318 	}
1319 	for (i = 0; i < bo_gem->softpin_target_count; i++)
1320 		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1321 								  time);
1322 	bo_gem->kflags = 0;
1323 	bo_gem->reloc_count = 0;
1324 	bo_gem->used_as_reloc_target = false;
1325 	bo_gem->softpin_target_count = 0;
1326 
1327 	DBG("bo_unreference final: %d (%s)\n",
1328 	    bo_gem->gem_handle, bo_gem->name);
1329 
1330 	/* release memory associated with this object */
1331 	if (bo_gem->reloc_target_info) {
1332 		free(bo_gem->reloc_target_info);
1333 		bo_gem->reloc_target_info = NULL;
1334 	}
1335 	if (bo_gem->relocs) {
1336 		free(bo_gem->relocs);
1337 		bo_gem->relocs = NULL;
1338 	}
1339 	if (bo_gem->softpin_target) {
1340 		free(bo_gem->softpin_target);
1341 		bo_gem->softpin_target = NULL;
1342 		bo_gem->softpin_target_size = 0;
1343 	}
1344 
1345 	/* Clear any left-over mappings */
1346 	if (bo_gem->map_count) {
1347 		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1348 		bo_gem->map_count = 0;
1349 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1350 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1351 	}
1352 
1353 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1354 	/* Put the buffer into our internal cache for reuse if we can. */
1355 	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1356 	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1357 					      I915_MADV_DONTNEED)) {
1358 		bo_gem->free_time = time;
1359 
1360 		bo_gem->name = NULL;
1361 		bo_gem->validate_index = -1;
1362 
1363 		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1364 	} else {
1365 		drm_intel_gem_bo_free(bo);
1366 	}
1367 }
1368 
drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo * bo,time_t time)1369 static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1370 						      time_t time)
1371 {
1372 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1373 
1374 	assert(atomic_read(&bo_gem->refcount) > 0);
1375 	if (atomic_dec_and_test(&bo_gem->refcount))
1376 		drm_intel_gem_bo_unreference_final(bo, time);
1377 }
1378 
drm_intel_gem_bo_unreference(drm_intel_bo * bo)1379 static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1380 {
1381 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1382 
1383 	assert(atomic_read(&bo_gem->refcount) > 0);
1384 
1385 	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1386 		drm_intel_bufmgr_gem *bufmgr_gem =
1387 		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1388 		struct timespec time;
1389 
1390 		clock_gettime(CLOCK_MONOTONIC, &time);
1391 
1392 		pthread_mutex_lock(&bufmgr_gem->lock);
1393 
1394 		if (atomic_dec_and_test(&bo_gem->refcount)) {
1395 			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1396 			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1397 		}
1398 
1399 		pthread_mutex_unlock(&bufmgr_gem->lock);
1400 	}
1401 }
1402 
drm_intel_gem_bo_map(drm_intel_bo * bo,int write_enable)1403 static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1404 {
1405 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1406 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1407 	struct drm_i915_gem_set_domain set_domain;
1408 	int ret;
1409 
1410 	if (bo_gem->is_userptr) {
1411 		/* Return the same user ptr */
1412 		bo->virtual = bo_gem->user_virtual;
1413 		return 0;
1414 	}
1415 
1416 	pthread_mutex_lock(&bufmgr_gem->lock);
1417 
1418 	if (bo_gem->map_count++ == 0)
1419 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1420 
1421 	if (!bo_gem->mem_virtual) {
1422 		struct drm_i915_gem_mmap mmap_arg;
1423 
1424 		DBG("bo_map: %d (%s), map_count=%d\n",
1425 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1426 
1427 		memclear(mmap_arg);
1428 		mmap_arg.handle = bo_gem->gem_handle;
1429 		mmap_arg.size = bo->size;
1430 		ret = drmIoctl(bufmgr_gem->fd,
1431 			       DRM_IOCTL_I915_GEM_MMAP,
1432 			       &mmap_arg);
1433 		if (ret != 0) {
1434 			ret = -errno;
1435 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1436 			    __FILE__, __LINE__, bo_gem->gem_handle,
1437 			    bo_gem->name, strerror(errno));
1438 			if (--bo_gem->map_count == 0)
1439 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1440 			pthread_mutex_unlock(&bufmgr_gem->lock);
1441 			return ret;
1442 		}
1443 		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1444 		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1445 	}
1446 	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1447 	    bo_gem->mem_virtual);
1448 	bo->virtual = bo_gem->mem_virtual;
1449 
1450 	memclear(set_domain);
1451 	set_domain.handle = bo_gem->gem_handle;
1452 	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1453 	if (write_enable)
1454 		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1455 	else
1456 		set_domain.write_domain = 0;
1457 	ret = drmIoctl(bufmgr_gem->fd,
1458 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1459 		       &set_domain);
1460 	if (ret != 0) {
1461 		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1462 		    __FILE__, __LINE__, bo_gem->gem_handle,
1463 		    strerror(errno));
1464 	}
1465 
1466 	if (write_enable)
1467 		bo_gem->mapped_cpu_write = true;
1468 
1469 	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1470 	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1471 	pthread_mutex_unlock(&bufmgr_gem->lock);
1472 
1473 	return 0;
1474 }
1475 
1476 static int
map_gtt(drm_intel_bo * bo)1477 map_gtt(drm_intel_bo *bo)
1478 {
1479 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1480 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1481 	int ret;
1482 
1483 	if (bo_gem->is_userptr)
1484 		return -EINVAL;
1485 
1486 	if (bo_gem->map_count++ == 0)
1487 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1488 
1489 	/* Get a mapping of the buffer if we haven't before. */
1490 	if (bo_gem->gtt_virtual == NULL) {
1491 		struct drm_i915_gem_mmap_gtt mmap_arg;
1492 
1493 		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1494 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1495 
1496 		memclear(mmap_arg);
1497 		mmap_arg.handle = bo_gem->gem_handle;
1498 
1499 		/* Get the fake offset back... */
1500 		ret = drmIoctl(bufmgr_gem->fd,
1501 			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1502 			       &mmap_arg);
1503 		if (ret != 0) {
1504 			ret = -errno;
1505 			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1506 			    __FILE__, __LINE__,
1507 			    bo_gem->gem_handle, bo_gem->name,
1508 			    strerror(errno));
1509 			if (--bo_gem->map_count == 0)
1510 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1511 			return ret;
1512 		}
1513 
1514 		/* and mmap it */
1515 		bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1516 					       MAP_SHARED, bufmgr_gem->fd,
1517 					       mmap_arg.offset);
1518 		if (bo_gem->gtt_virtual == MAP_FAILED) {
1519 			bo_gem->gtt_virtual = NULL;
1520 			ret = -errno;
1521 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1522 			    __FILE__, __LINE__,
1523 			    bo_gem->gem_handle, bo_gem->name,
1524 			    strerror(errno));
1525 			if (--bo_gem->map_count == 0)
1526 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1527 			return ret;
1528 		}
1529 	}
1530 
1531 	bo->virtual = bo_gem->gtt_virtual;
1532 
1533 	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1534 	    bo_gem->gtt_virtual);
1535 
1536 	return 0;
1537 }
1538 
1539 drm_public int
drm_intel_gem_bo_map_gtt(drm_intel_bo * bo)1540 drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1541 {
1542 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1543 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1544 	struct drm_i915_gem_set_domain set_domain;
1545 	int ret;
1546 
1547 	pthread_mutex_lock(&bufmgr_gem->lock);
1548 
1549 	ret = map_gtt(bo);
1550 	if (ret) {
1551 		pthread_mutex_unlock(&bufmgr_gem->lock);
1552 		return ret;
1553 	}
1554 
1555 	/* Now move it to the GTT domain so that the GPU and CPU
1556 	 * caches are flushed and the GPU isn't actively using the
1557 	 * buffer.
1558 	 *
1559 	 * The pagefault handler does this domain change for us when
1560 	 * it has unbound the BO from the GTT, but it's up to us to
1561 	 * tell it when we're about to use things if we had done
1562 	 * rendering and it still happens to be bound to the GTT.
1563 	 */
1564 	memclear(set_domain);
1565 	set_domain.handle = bo_gem->gem_handle;
1566 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1567 	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1568 	ret = drmIoctl(bufmgr_gem->fd,
1569 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1570 		       &set_domain);
1571 	if (ret != 0) {
1572 		DBG("%s:%d: Error setting domain %d: %s\n",
1573 		    __FILE__, __LINE__, bo_gem->gem_handle,
1574 		    strerror(errno));
1575 	}
1576 
1577 	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1578 	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1579 	pthread_mutex_unlock(&bufmgr_gem->lock);
1580 
1581 	return 0;
1582 }
1583 
1584 /**
1585  * Performs a mapping of the buffer object like the normal GTT
1586  * mapping, but avoids waiting for the GPU to be done reading from or
1587  * rendering to the buffer.
1588  *
1589  * This is used in the implementation of GL_ARB_map_buffer_range: The
1590  * user asks to create a buffer, then does a mapping, fills some
1591  * space, runs a drawing command, then asks to map it again without
1592  * synchronizing because it guarantees that it won't write over the
1593  * data that the GPU is busy using (or, more specifically, that if it
1594  * does write over the data, it acknowledges that rendering is
1595  * undefined).
1596  */
1597 
1598 drm_public int
drm_intel_gem_bo_map_unsynchronized(drm_intel_bo * bo)1599 drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1600 {
1601 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1602 #if HAVE_VALGRIND
1603 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1604 #endif
1605 	int ret;
1606 
1607 	/* If the CPU cache isn't coherent with the GTT, then use a
1608 	 * regular synchronized mapping.  The problem is that we don't
1609 	 * track where the buffer was last used on the CPU side in
1610 	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1611 	 * we would potentially corrupt the buffer even when the user
1612 	 * does reasonable things.
1613 	 */
1614 	if (!bufmgr_gem->has_llc)
1615 		return drm_intel_gem_bo_map_gtt(bo);
1616 
1617 	pthread_mutex_lock(&bufmgr_gem->lock);
1618 
1619 	ret = map_gtt(bo);
1620 	if (ret == 0) {
1621 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1622 		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1623 	}
1624 
1625 	pthread_mutex_unlock(&bufmgr_gem->lock);
1626 
1627 	return ret;
1628 }
1629 
drm_intel_gem_bo_unmap(drm_intel_bo * bo)1630 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1631 {
1632 	drm_intel_bufmgr_gem *bufmgr_gem;
1633 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1634 	int ret = 0;
1635 
1636 	if (bo == NULL)
1637 		return 0;
1638 
1639 	if (bo_gem->is_userptr)
1640 		return 0;
1641 
1642 	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1643 
1644 	pthread_mutex_lock(&bufmgr_gem->lock);
1645 
1646 	if (bo_gem->map_count <= 0) {
1647 		DBG("attempted to unmap an unmapped bo\n");
1648 		pthread_mutex_unlock(&bufmgr_gem->lock);
1649 		/* Preserve the old behaviour of just treating this as a
1650 		 * no-op rather than reporting the error.
1651 		 */
1652 		return 0;
1653 	}
1654 
1655 	if (bo_gem->mapped_cpu_write) {
1656 		struct drm_i915_gem_sw_finish sw_finish;
1657 
1658 		/* Cause a flush to happen if the buffer's pinned for
1659 		 * scanout, so the results show up in a timely manner.
1660 		 * Unlike GTT set domains, this only does work if the
1661 		 * buffer should be scanout-related.
1662 		 */
1663 		memclear(sw_finish);
1664 		sw_finish.handle = bo_gem->gem_handle;
1665 		ret = drmIoctl(bufmgr_gem->fd,
1666 			       DRM_IOCTL_I915_GEM_SW_FINISH,
1667 			       &sw_finish);
1668 		ret = ret == -1 ? -errno : 0;
1669 
1670 		bo_gem->mapped_cpu_write = false;
1671 	}
1672 
1673 	/* We need to unmap after every innovation as we cannot track
1674 	 * an open vma for every bo as that will exhaust the system
1675 	 * limits and cause later failures.
1676 	 */
1677 	if (--bo_gem->map_count == 0) {
1678 		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1679 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1680 		bo->virtual = NULL;
1681 	}
1682 	pthread_mutex_unlock(&bufmgr_gem->lock);
1683 
1684 	return ret;
1685 }
1686 
1687 drm_public int
drm_intel_gem_bo_unmap_gtt(drm_intel_bo * bo)1688 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1689 {
1690 	return drm_intel_gem_bo_unmap(bo);
1691 }
1692 
is_cache_coherent(drm_intel_bo * bo)1693 static bool is_cache_coherent(drm_intel_bo *bo)
1694 {
1695 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1696 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1697 	struct drm_i915_gem_caching arg = {};
1698 
1699 	arg.handle = bo_gem->gem_handle;
1700 	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHING, &arg))
1701 		assert(false);
1702 	return arg.caching != I915_CACHING_NONE;
1703 }
1704 
set_domain(drm_intel_bo * bo,uint32_t read,uint32_t write)1705 static void set_domain(drm_intel_bo *bo, uint32_t read, uint32_t write)
1706 {
1707 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1708 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1709 	struct drm_i915_gem_set_domain arg = {};
1710 
1711 	arg.handle = bo_gem->gem_handle;
1712 	arg.read_domains = read;
1713 	arg.write_domain = write;
1714 	if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg))
1715 		assert(false);
1716 }
1717 
mmap_write(drm_intel_bo * bo,unsigned long offset,unsigned long length,const void * buf)1718 static int mmap_write(drm_intel_bo *bo, unsigned long offset,
1719 		      unsigned long length, const void *buf)
1720 {
1721 	void *map = NULL;
1722 
1723 	if (!length)
1724 		return 0;
1725 
1726 	if (is_cache_coherent(bo)) {
1727 		map = drm_intel_gem_bo_map__cpu(bo);
1728 		if (map)
1729 			set_domain(bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
1730 	}
1731 	if (!map) {
1732 		map = drm_intel_gem_bo_map__wc(bo);
1733 		if (map)
1734 			set_domain(bo, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
1735 	}
1736 
1737 	assert(map);
1738 	memcpy((char *)map + offset, buf, length);
1739 	drm_intel_gem_bo_unmap(bo);
1740 	return 0;
1741 }
1742 
mmap_read(drm_intel_bo * bo,unsigned long offset,unsigned long length,void * buf)1743 static int mmap_read(drm_intel_bo *bo, unsigned long offset,
1744 		      unsigned long length, void *buf)
1745 {
1746 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1747 	void *map = NULL;
1748 
1749 	if (!length)
1750 		return 0;
1751 
1752 	if (bufmgr_gem->has_llc || is_cache_coherent(bo)) {
1753 		map = drm_intel_gem_bo_map__cpu(bo);
1754 		if (map)
1755 			set_domain(bo, I915_GEM_DOMAIN_CPU, 0);
1756 	}
1757 	if (!map) {
1758 		map = drm_intel_gem_bo_map__wc(bo);
1759 		if (map)
1760 			set_domain(bo, I915_GEM_DOMAIN_WC, 0);
1761 	}
1762 
1763 	assert(map);
1764 	memcpy(buf, (char *)map + offset, length);
1765 	drm_intel_gem_bo_unmap(bo);
1766 	return 0;
1767 }
1768 
1769 static int
drm_intel_gem_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)1770 drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1771 			 unsigned long size, const void *data)
1772 {
1773 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1774 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1775 	struct drm_i915_gem_pwrite pwrite;
1776 	int ret;
1777 
1778 	if (bo_gem->is_userptr)
1779 		return -EINVAL;
1780 
1781 	memclear(pwrite);
1782 	pwrite.handle = bo_gem->gem_handle;
1783 	pwrite.offset = offset;
1784 	pwrite.size = size;
1785 	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1786 	ret = drmIoctl(bufmgr_gem->fd,
1787 		       DRM_IOCTL_I915_GEM_PWRITE,
1788 		       &pwrite);
1789 	if (ret)
1790 		ret = -errno;
1791 
1792 	if (ret != 0 && ret != -EOPNOTSUPP) {
1793 		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1794 		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1795 		    (int)size, strerror(errno));
1796 		return ret;
1797 	}
1798 
1799 	if (ret == -EOPNOTSUPP)
1800 		mmap_write(bo, offset, size, data);
1801 
1802 	return 0;
1803 }
1804 
1805 static int
drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr * bufmgr,int crtc_id)1806 drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1807 {
1808 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1809 	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1810 	int ret;
1811 
1812 	memclear(get_pipe_from_crtc_id);
1813 	get_pipe_from_crtc_id.crtc_id = crtc_id;
1814 	ret = drmIoctl(bufmgr_gem->fd,
1815 		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1816 		       &get_pipe_from_crtc_id);
1817 	if (ret != 0) {
1818 		/* We return -1 here to signal that we don't
1819 		 * know which pipe is associated with this crtc.
1820 		 * This lets the caller know that this information
1821 		 * isn't available; using the wrong pipe for
1822 		 * vblank waiting can cause the chipset to lock up
1823 		 */
1824 		return -1;
1825 	}
1826 
1827 	return get_pipe_from_crtc_id.pipe;
1828 }
1829 
1830 static int
drm_intel_gem_bo_get_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,void * data)1831 drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1832 			     unsigned long size, void *data)
1833 {
1834 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1835 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1836 	struct drm_i915_gem_pread pread;
1837 	int ret;
1838 
1839 	if (bo_gem->is_userptr)
1840 		return -EINVAL;
1841 
1842 	memclear(pread);
1843 	pread.handle = bo_gem->gem_handle;
1844 	pread.offset = offset;
1845 	pread.size = size;
1846 	pread.data_ptr = (uint64_t) (uintptr_t) data;
1847 	ret = drmIoctl(bufmgr_gem->fd,
1848 		       DRM_IOCTL_I915_GEM_PREAD,
1849 		       &pread);
1850 	if (ret)
1851 		ret = -errno;
1852 
1853 	if (ret != 0 && ret != -EOPNOTSUPP) {
1854 		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1855 		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1856 		    (int)size, strerror(errno));
1857 		return ret;
1858 	}
1859 
1860 	if (ret == -EOPNOTSUPP)
1861 		mmap_read(bo, offset, size, data);
1862 
1863 	return 0;
1864 }
1865 
1866 /** Waits for all GPU rendering with the object to have completed. */
1867 static void
drm_intel_gem_bo_wait_rendering(drm_intel_bo * bo)1868 drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1869 {
1870 	drm_intel_gem_bo_start_gtt_access(bo, 1);
1871 }
1872 
1873 /**
1874  * Waits on a BO for the given amount of time.
1875  *
1876  * @bo: buffer object to wait for
1877  * @timeout_ns: amount of time to wait in nanoseconds.
1878  *   If value is less than 0, an infinite wait will occur.
1879  *
1880  * Returns 0 if the wait was successful ie. the last batch referencing the
1881  * object has completed within the allotted time. Otherwise some negative return
1882  * value describes the error. Of particular interest is -ETIME when the wait has
1883  * failed to yield the desired result.
1884  *
1885  * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1886  * the operation to give up after a certain amount of time. Another subtle
1887  * difference is the internal locking semantics are different (this variant does
1888  * not hold the lock for the duration of the wait). This makes the wait subject
1889  * to a larger userspace race window.
1890  *
1891  * The implementation shall wait until the object is no longer actively
1892  * referenced within a batch buffer at the time of the call. The wait will
1893  * not guarantee that the buffer is re-issued via another thread, or an flinked
1894  * handle. Userspace must make sure this race does not occur if such precision
1895  * is important.
1896  *
1897  * Note that some kernels have broken the inifite wait for negative values
1898  * promise, upgrade to latest stable kernels if this is the case.
1899  */
1900 drm_public int
drm_intel_gem_bo_wait(drm_intel_bo * bo,int64_t timeout_ns)1901 drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1902 {
1903 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1904 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1905 	struct drm_i915_gem_wait wait;
1906 	int ret;
1907 
1908 	if (!bufmgr_gem->has_wait_timeout) {
1909 		DBG("%s:%d: Timed wait is not supported. Falling back to "
1910 		    "infinite wait\n", __FILE__, __LINE__);
1911 		if (timeout_ns) {
1912 			drm_intel_gem_bo_wait_rendering(bo);
1913 			return 0;
1914 		} else {
1915 			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1916 		}
1917 	}
1918 
1919 	memclear(wait);
1920 	wait.bo_handle = bo_gem->gem_handle;
1921 	wait.timeout_ns = timeout_ns;
1922 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1923 	if (ret == -1)
1924 		return -errno;
1925 
1926 	return ret;
1927 }
1928 
1929 /**
1930  * Sets the object to the GTT read and possibly write domain, used by the X
1931  * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1932  *
1933  * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1934  * can do tiled pixmaps this way.
1935  */
1936 drm_public void
drm_intel_gem_bo_start_gtt_access(drm_intel_bo * bo,int write_enable)1937 drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1938 {
1939 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1940 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1941 	struct drm_i915_gem_set_domain set_domain;
1942 	int ret;
1943 
1944 	memclear(set_domain);
1945 	set_domain.handle = bo_gem->gem_handle;
1946 	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1947 	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1948 	ret = drmIoctl(bufmgr_gem->fd,
1949 		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1950 		       &set_domain);
1951 	if (ret != 0) {
1952 		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1953 		    __FILE__, __LINE__, bo_gem->gem_handle,
1954 		    set_domain.read_domains, set_domain.write_domain,
1955 		    strerror(errno));
1956 	}
1957 }
1958 
1959 static void
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr * bufmgr)1960 drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1961 {
1962 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1963 	int i, ret;
1964 
1965 	free(bufmgr_gem->exec2_objects);
1966 	free(bufmgr_gem->exec_bos);
1967 
1968 	pthread_mutex_destroy(&bufmgr_gem->lock);
1969 
1970 	/* Free any cached buffer objects we were going to reuse */
1971 	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1972 		struct drm_intel_gem_bo_bucket *bucket =
1973 		    &bufmgr_gem->cache_bucket[i];
1974 		drm_intel_bo_gem *bo_gem;
1975 
1976 		while (!DRMLISTEMPTY(&bucket->head)) {
1977 			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1978 					      bucket->head.next, head);
1979 			DRMLISTDEL(&bo_gem->head);
1980 
1981 			drm_intel_gem_bo_free(&bo_gem->bo);
1982 		}
1983 	}
1984 
1985 	/* Release userptr bo kept hanging around for optimisation. */
1986 	if (bufmgr_gem->userptr_active.ptr) {
1987 		ret = drmCloseBufferHandle(bufmgr_gem->fd,
1988 					   bufmgr_gem->userptr_active.handle);
1989 		free(bufmgr_gem->userptr_active.ptr);
1990 		if (ret)
1991 			fprintf(stderr,
1992 				"Failed to release test userptr object! (%d) "
1993 				"i915 kernel driver may not be sane!\n", errno);
1994 	}
1995 
1996 	free(bufmgr);
1997 }
1998 
1999 /**
2000  * Adds the target buffer to the validation list and adds the relocation
2001  * to the reloc_buffer's relocation list.
2002  *
2003  * The relocation entry at the given offset must already contain the
2004  * precomputed relocation value, because the kernel will optimize out
2005  * the relocation entry write when the buffer hasn't moved from the
2006  * last known offset in target_bo.
2007  */
2008 static int
do_bo_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain,bool need_fence)2009 do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2010 		 drm_intel_bo *target_bo, uint32_t target_offset,
2011 		 uint32_t read_domains, uint32_t write_domain,
2012 		 bool need_fence)
2013 {
2014 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2015 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2016 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2017 	bool fenced_command;
2018 
2019 	if (bo_gem->has_error)
2020 		return -ENOMEM;
2021 
2022 	if (target_bo_gem->has_error) {
2023 		bo_gem->has_error = true;
2024 		return -ENOMEM;
2025 	}
2026 
2027 	/* We never use HW fences for rendering on 965+ */
2028 	if (bufmgr_gem->gen >= 4)
2029 		need_fence = false;
2030 
2031 	fenced_command = need_fence;
2032 	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
2033 		need_fence = false;
2034 
2035 	/* Create a new relocation list if needed */
2036 	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
2037 		return -ENOMEM;
2038 
2039 	/* Check overflow */
2040 	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
2041 
2042 	/* Check args */
2043 	assert(offset <= bo->size - 4);
2044 	assert((write_domain & (write_domain - 1)) == 0);
2045 
2046 	/* An object needing a fence is a tiled buffer, so it won't have
2047 	 * relocs to other buffers.
2048 	 */
2049 	if (need_fence) {
2050 		assert(target_bo_gem->reloc_count == 0);
2051 		target_bo_gem->reloc_tree_fences = 1;
2052 	}
2053 
2054 	/* Make sure that we're not adding a reloc to something whose size has
2055 	 * already been accounted for.
2056 	 */
2057 	assert(!bo_gem->used_as_reloc_target);
2058 	if (target_bo_gem != bo_gem) {
2059 		target_bo_gem->used_as_reloc_target = true;
2060 		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2061 		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2062 	}
2063 
2064 	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2065 	if (target_bo != bo)
2066 		drm_intel_gem_bo_reference(target_bo);
2067 	if (fenced_command)
2068 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2069 			DRM_INTEL_RELOC_FENCE;
2070 	else
2071 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2072 
2073 	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2074 	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2075 	bo_gem->relocs[bo_gem->reloc_count].target_handle =
2076 	    target_bo_gem->gem_handle;
2077 	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2078 	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2079 	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2080 	bo_gem->reloc_count++;
2081 
2082 	return 0;
2083 }
2084 
2085 static void
drm_intel_gem_bo_use_48b_address_range(drm_intel_bo * bo,uint32_t enable)2086 drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable)
2087 {
2088 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2089 
2090 	if (enable)
2091 		bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2092 	else
2093 		bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2094 }
2095 
2096 static int
drm_intel_gem_bo_add_softpin_target(drm_intel_bo * bo,drm_intel_bo * target_bo)2097 drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
2098 {
2099 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2100 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2101 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2102 	if (bo_gem->has_error)
2103 		return -ENOMEM;
2104 
2105 	if (target_bo_gem->has_error) {
2106 		bo_gem->has_error = true;
2107 		return -ENOMEM;
2108 	}
2109 
2110 	if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2111 		return -EINVAL;
2112 	if (target_bo_gem == bo_gem)
2113 		return -EINVAL;
2114 
2115 	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2116 		int new_size = bo_gem->softpin_target_size * 2;
2117 		if (new_size == 0)
2118 			new_size = bufmgr_gem->max_relocs;
2119 
2120 		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2121 				sizeof(drm_intel_bo *));
2122 		if (!bo_gem->softpin_target)
2123 			return -ENOMEM;
2124 
2125 		bo_gem->softpin_target_size = new_size;
2126 	}
2127 	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2128 	drm_intel_gem_bo_reference(target_bo);
2129 	bo_gem->softpin_target_count++;
2130 
2131 	return 0;
2132 }
2133 
2134 static int
drm_intel_gem_bo_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)2135 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
2136 			    drm_intel_bo *target_bo, uint32_t target_offset,
2137 			    uint32_t read_domains, uint32_t write_domain)
2138 {
2139 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2140 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
2141 
2142 	if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2143 		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
2144 	else
2145 		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2146 					read_domains, write_domain,
2147 					!bufmgr_gem->fenced_relocs);
2148 }
2149 
2150 static int
drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)2151 drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
2152 				  drm_intel_bo *target_bo,
2153 				  uint32_t target_offset,
2154 				  uint32_t read_domains, uint32_t write_domain)
2155 {
2156 	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2157 				read_domains, write_domain, true);
2158 }
2159 
2160 drm_public int
drm_intel_gem_bo_get_reloc_count(drm_intel_bo * bo)2161 drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
2162 {
2163 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2164 
2165 	return bo_gem->reloc_count;
2166 }
2167 
2168 /**
2169  * Removes existing relocation entries in the BO after "start".
2170  *
2171  * This allows a user to avoid a two-step process for state setup with
2172  * counting up all the buffer objects and doing a
2173  * drm_intel_bufmgr_check_aperture_space() before emitting any of the
2174  * relocations for the state setup.  Instead, save the state of the
2175  * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
2176  * state, and then check if it still fits in the aperture.
2177  *
2178  * Any further drm_intel_bufmgr_check_aperture_space() queries
2179  * involving this buffer in the tree are undefined after this call.
2180  *
2181  * This also removes all softpinned targets being referenced by the BO.
2182  */
2183 drm_public void
drm_intel_gem_bo_clear_relocs(drm_intel_bo * bo,int start)2184 drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
2185 {
2186 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2187 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2188 	int i;
2189 	struct timespec time;
2190 
2191 	clock_gettime(CLOCK_MONOTONIC, &time);
2192 
2193 	assert(bo_gem->reloc_count >= start);
2194 
2195 	/* Unreference the cleared target buffers */
2196 	pthread_mutex_lock(&bufmgr_gem->lock);
2197 
2198 	for (i = start; i < bo_gem->reloc_count; i++) {
2199 		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
2200 		if (&target_bo_gem->bo != bo) {
2201 			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2202 			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2203 								  time.tv_sec);
2204 		}
2205 	}
2206 	bo_gem->reloc_count = start;
2207 
2208 	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2209 		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
2210 		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2211 	}
2212 	bo_gem->softpin_target_count = 0;
2213 
2214 	pthread_mutex_unlock(&bufmgr_gem->lock);
2215 
2216 }
2217 
2218 /**
2219  * Walk the tree of relocations rooted at BO and accumulate the list of
2220  * validations to be performed and update the relocation buffers with
2221  * index values into the validation list.
2222  */
2223 static void
drm_intel_gem_bo_process_reloc2(drm_intel_bo * bo)2224 drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
2225 {
2226 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2227 	int i;
2228 
2229 	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2230 		return;
2231 
2232 	for (i = 0; i < bo_gem->reloc_count; i++) {
2233 		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2234 		int need_fence;
2235 
2236 		if (target_bo == bo)
2237 			continue;
2238 
2239 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2240 
2241 		/* Continue walking the tree depth-first. */
2242 		drm_intel_gem_bo_process_reloc2(target_bo);
2243 
2244 		need_fence = (bo_gem->reloc_target_info[i].flags &
2245 			      DRM_INTEL_RELOC_FENCE);
2246 
2247 		/* Add the target to the validate list */
2248 		drm_intel_add_validate_buffer2(target_bo, need_fence);
2249 	}
2250 
2251 	for (i = 0; i < bo_gem->softpin_target_count; i++) {
2252 		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
2253 
2254 		if (target_bo == bo)
2255 			continue;
2256 
2257 		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
2258 		drm_intel_gem_bo_process_reloc2(target_bo);
2259 		drm_intel_add_validate_buffer2(target_bo, false);
2260 	}
2261 }
2262 
2263 static void
drm_intel_update_buffer_offsets2(drm_intel_bufmgr_gem * bufmgr_gem)2264 drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2265 {
2266 	int i;
2267 
2268 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2269 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2270 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2271 
2272 		/* Update the buffer offset */
2273 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2274 			/* If we're seeing softpinned object here it means that the kernel
2275 			 * has relocated our object... Indicating a programming error
2276 			 */
2277 			assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2278 			DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2279 			    bo_gem->gem_handle, bo_gem->name,
2280 			    upper_32_bits(bo->offset64),
2281 			    lower_32_bits(bo->offset64),
2282 			    upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2283 			    lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2284 			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2285 			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2286 		}
2287 	}
2288 }
2289 
2290 drm_public void
drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo * bo,int x1,int y1,int width,int height,enum aub_dump_bmp_format format,int pitch,int offset)2291 drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2292 			      int x1, int y1, int width, int height,
2293 			      enum aub_dump_bmp_format format,
2294 			      int pitch, int offset)
2295 {
2296 }
2297 
2298 static int
do_exec2(drm_intel_bo * bo,int used,drm_intel_context * ctx,drm_clip_rect_t * cliprects,int num_cliprects,int DR4,int in_fence,int * out_fence,unsigned int flags)2299 do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2300 	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2301 	 int in_fence, int *out_fence,
2302 	 unsigned int flags)
2303 {
2304 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2305 	struct drm_i915_gem_execbuffer2 execbuf;
2306 	int ret = 0;
2307 	int i;
2308 
2309 	if (to_bo_gem(bo)->has_error)
2310 		return -ENOMEM;
2311 
2312 	switch (flags & 0x7) {
2313 	default:
2314 		return -EINVAL;
2315 	case I915_EXEC_BLT:
2316 		if (!bufmgr_gem->has_blt)
2317 			return -EINVAL;
2318 		break;
2319 	case I915_EXEC_BSD:
2320 		if (!bufmgr_gem->has_bsd)
2321 			return -EINVAL;
2322 		break;
2323 	case I915_EXEC_VEBOX:
2324 		if (!bufmgr_gem->has_vebox)
2325 			return -EINVAL;
2326 		break;
2327 	case I915_EXEC_RENDER:
2328 	case I915_EXEC_DEFAULT:
2329 		break;
2330 	}
2331 
2332 	pthread_mutex_lock(&bufmgr_gem->lock);
2333 	/* Update indices and set up the validate list. */
2334 	drm_intel_gem_bo_process_reloc2(bo);
2335 
2336 	/* Add the batch buffer to the validation list.  There are no relocations
2337 	 * pointing to it.
2338 	 */
2339 	drm_intel_add_validate_buffer2(bo, 0);
2340 
2341 	memclear(execbuf);
2342 	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2343 	execbuf.buffer_count = bufmgr_gem->exec_count;
2344 	execbuf.batch_start_offset = 0;
2345 	execbuf.batch_len = used;
2346 	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2347 	execbuf.num_cliprects = num_cliprects;
2348 	execbuf.DR1 = 0;
2349 	execbuf.DR4 = DR4;
2350 	execbuf.flags = flags;
2351 	if (ctx == NULL)
2352 		i915_execbuffer2_set_context_id(execbuf, 0);
2353 	else
2354 		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2355 	execbuf.rsvd2 = 0;
2356 	if (in_fence != -1) {
2357 		execbuf.rsvd2 = in_fence;
2358 		execbuf.flags |= I915_EXEC_FENCE_IN;
2359 	}
2360 	if (out_fence != NULL) {
2361 		*out_fence = -1;
2362 		execbuf.flags |= I915_EXEC_FENCE_OUT;
2363 	}
2364 
2365 	if (bufmgr_gem->no_exec)
2366 		goto skip_execution;
2367 
2368 	ret = drmIoctl(bufmgr_gem->fd,
2369 		       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2370 		       &execbuf);
2371 	if (ret != 0) {
2372 		ret = -errno;
2373 		if (ret == -ENOSPC) {
2374 			DBG("Execbuffer fails to pin. "
2375 			    "Estimate: %u. Actual: %u. Available: %u\n",
2376 			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2377 							       bufmgr_gem->exec_count),
2378 			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2379 							      bufmgr_gem->exec_count),
2380 			    (unsigned int) bufmgr_gem->gtt_size);
2381 		}
2382 	}
2383 	drm_intel_update_buffer_offsets2(bufmgr_gem);
2384 
2385 	if (ret == 0 && out_fence != NULL)
2386 		*out_fence = execbuf.rsvd2 >> 32;
2387 
2388 skip_execution:
2389 	if (bufmgr_gem->bufmgr.debug)
2390 		drm_intel_gem_dump_validation_list(bufmgr_gem);
2391 
2392 	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2393 		drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2394 
2395 		bo_gem->idle = false;
2396 
2397 		/* Disconnect the buffer from the validate list */
2398 		bo_gem->validate_index = -1;
2399 		bufmgr_gem->exec_bos[i] = NULL;
2400 	}
2401 	bufmgr_gem->exec_count = 0;
2402 	pthread_mutex_unlock(&bufmgr_gem->lock);
2403 
2404 	return ret;
2405 }
2406 
2407 static int
drm_intel_gem_bo_exec2(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)2408 drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2409 		       drm_clip_rect_t *cliprects, int num_cliprects,
2410 		       int DR4)
2411 {
2412 	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2413 			-1, NULL, I915_EXEC_RENDER);
2414 }
2415 
2416 static int
drm_intel_gem_bo_mrb_exec2(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4,unsigned int flags)2417 drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2418 			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2419 			unsigned int flags)
2420 {
2421 	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2422 			-1, NULL, flags);
2423 }
2424 
2425 drm_public int
drm_intel_gem_bo_context_exec(drm_intel_bo * bo,drm_intel_context * ctx,int used,unsigned int flags)2426 drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2427 			      int used, unsigned int flags)
2428 {
2429 	return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2430 }
2431 
2432 drm_public int
drm_intel_gem_bo_fence_exec(drm_intel_bo * bo,drm_intel_context * ctx,int used,int in_fence,int * out_fence,unsigned int flags)2433 drm_intel_gem_bo_fence_exec(drm_intel_bo *bo,
2434 			    drm_intel_context *ctx,
2435 			    int used,
2436 			    int in_fence,
2437 			    int *out_fence,
2438 			    unsigned int flags)
2439 {
2440 	return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2441 }
2442 
2443 static int
drm_intel_gem_bo_pin(drm_intel_bo * bo,uint32_t alignment)2444 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2445 {
2446 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2447 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2448 	struct drm_i915_gem_pin pin;
2449 	int ret;
2450 
2451 	memclear(pin);
2452 	pin.handle = bo_gem->gem_handle;
2453 	pin.alignment = alignment;
2454 
2455 	ret = drmIoctl(bufmgr_gem->fd,
2456 		       DRM_IOCTL_I915_GEM_PIN,
2457 		       &pin);
2458 	if (ret != 0)
2459 		return -errno;
2460 
2461 	bo->offset64 = pin.offset;
2462 	bo->offset = pin.offset;
2463 	return 0;
2464 }
2465 
2466 static int
drm_intel_gem_bo_unpin(drm_intel_bo * bo)2467 drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2468 {
2469 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2470 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2471 	struct drm_i915_gem_unpin unpin;
2472 	int ret;
2473 
2474 	memclear(unpin);
2475 	unpin.handle = bo_gem->gem_handle;
2476 
2477 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2478 	if (ret != 0)
2479 		return -errno;
2480 
2481 	return 0;
2482 }
2483 
2484 static int
drm_intel_gem_bo_set_tiling_internal(drm_intel_bo * bo,uint32_t tiling_mode,uint32_t stride)2485 drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2486 				     uint32_t tiling_mode,
2487 				     uint32_t stride)
2488 {
2489 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2490 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2491 	struct drm_i915_gem_set_tiling set_tiling;
2492 	int ret;
2493 
2494 	if (bo_gem->global_name == 0 &&
2495 	    tiling_mode == bo_gem->tiling_mode &&
2496 	    stride == bo_gem->stride)
2497 		return 0;
2498 
2499 	memset(&set_tiling, 0, sizeof(set_tiling));
2500 	do {
2501 		/* set_tiling is slightly broken and overwrites the
2502 		 * input on the error path, so we have to open code
2503 		 * rmIoctl.
2504 		 */
2505 		set_tiling.handle = bo_gem->gem_handle;
2506 		set_tiling.tiling_mode = tiling_mode;
2507 		set_tiling.stride = stride;
2508 
2509 		ret = ioctl(bufmgr_gem->fd,
2510 			    DRM_IOCTL_I915_GEM_SET_TILING,
2511 			    &set_tiling);
2512 	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2513 	if (ret == -1)
2514 		return -errno;
2515 
2516 	bo_gem->tiling_mode = set_tiling.tiling_mode;
2517 	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2518 	bo_gem->stride = set_tiling.stride;
2519 	return 0;
2520 }
2521 
2522 static int
drm_intel_gem_bo_set_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t stride)2523 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2524 			    uint32_t stride)
2525 {
2526 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2527 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2528 	int ret;
2529 
2530 	/* Tiling with userptr surfaces is not supported
2531 	 * on all hardware so refuse it for time being.
2532 	 */
2533 	if (bo_gem->is_userptr)
2534 		return -EINVAL;
2535 
2536 	/* Linear buffers have no stride. By ensuring that we only ever use
2537 	 * stride 0 with linear buffers, we simplify our code.
2538 	 */
2539 	if (*tiling_mode == I915_TILING_NONE)
2540 		stride = 0;
2541 
2542 	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2543 	if (ret == 0)
2544 		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2545 
2546 	*tiling_mode = bo_gem->tiling_mode;
2547 	return ret;
2548 }
2549 
2550 static int
drm_intel_gem_bo_get_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t * swizzle_mode)2551 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2552 			    uint32_t * swizzle_mode)
2553 {
2554 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2555 
2556 	*tiling_mode = bo_gem->tiling_mode;
2557 	*swizzle_mode = bo_gem->swizzle_mode;
2558 	return 0;
2559 }
2560 
2561 static int
drm_intel_gem_bo_set_softpin_offset(drm_intel_bo * bo,uint64_t offset)2562 drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
2563 {
2564 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2565 
2566 	bo->offset64 = offset;
2567 	bo->offset = offset;
2568 	bo_gem->kflags |= EXEC_OBJECT_PINNED;
2569 
2570 	return 0;
2571 }
2572 
2573 drm_public drm_intel_bo *
drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr * bufmgr,int prime_fd,int size)2574 drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2575 {
2576 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2577 	int ret;
2578 	uint32_t handle;
2579 	drm_intel_bo_gem *bo_gem;
2580 
2581 	pthread_mutex_lock(&bufmgr_gem->lock);
2582 	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2583 	if (ret) {
2584 		DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2585 		pthread_mutex_unlock(&bufmgr_gem->lock);
2586 		return NULL;
2587 	}
2588 
2589 	/*
2590 	 * See if the kernel has already returned this buffer to us. Just as
2591 	 * for named buffers, we must not create two bo's pointing at the same
2592 	 * kernel object
2593 	 */
2594 	HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2595 		  &handle, sizeof(handle), bo_gem);
2596 	if (bo_gem) {
2597 		drm_intel_gem_bo_reference(&bo_gem->bo);
2598 		goto out;
2599 	}
2600 
2601 	bo_gem = calloc(1, sizeof(*bo_gem));
2602 	if (!bo_gem)
2603 		goto out;
2604 
2605 	atomic_set(&bo_gem->refcount, 1);
2606 	DRMINITLISTHEAD(&bo_gem->vma_list);
2607 
2608 	/* Determine size of bo.  The fd-to-handle ioctl really should
2609 	 * return the size, but it doesn't.  If we have kernel 3.12 or
2610 	 * later, we can lseek on the prime fd to get the size.  Older
2611 	 * kernels will just fail, in which case we fall back to the
2612 	 * provided (estimated or guess size). */
2613 	ret = lseek(prime_fd, 0, SEEK_END);
2614 	if (ret != -1)
2615 		bo_gem->bo.size = ret;
2616 	else
2617 		bo_gem->bo.size = size;
2618 
2619 	bo_gem->bo.handle = handle;
2620 	bo_gem->bo.bufmgr = bufmgr;
2621 
2622 	bo_gem->gem_handle = handle;
2623 	HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2624 		 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2625 
2626 	bo_gem->name = "prime";
2627 	bo_gem->validate_index = -1;
2628 	bo_gem->reloc_tree_fences = 0;
2629 	bo_gem->used_as_reloc_target = false;
2630 	bo_gem->has_error = false;
2631 	bo_gem->reusable = false;
2632 
2633 	ret = get_tiling_mode(bufmgr_gem, handle,
2634 			      &bo_gem->tiling_mode, &bo_gem->swizzle_mode);
2635 	if (ret)
2636 		goto err;
2637 
2638 	/* XXX stride is unknown */
2639 	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2640 
2641 out:
2642 	pthread_mutex_unlock(&bufmgr_gem->lock);
2643 	return &bo_gem->bo;
2644 
2645 err:
2646 	drm_intel_gem_bo_free(&bo_gem->bo);
2647 	pthread_mutex_unlock(&bufmgr_gem->lock);
2648 	return NULL;
2649 }
2650 
2651 drm_public int
drm_intel_bo_gem_export_to_prime(drm_intel_bo * bo,int * prime_fd)2652 drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2653 {
2654 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2655 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2656 
2657 	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2658 			       DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2659 		return -errno;
2660 
2661 	bo_gem->reusable = false;
2662 
2663 	return 0;
2664 }
2665 
2666 static int
drm_intel_gem_bo_flink(drm_intel_bo * bo,uint32_t * name)2667 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2668 {
2669 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2670 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2671 
2672 	if (!bo_gem->global_name) {
2673 		struct drm_gem_flink flink;
2674 
2675 		memclear(flink);
2676 		flink.handle = bo_gem->gem_handle;
2677 		if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2678 			return -errno;
2679 
2680 		pthread_mutex_lock(&bufmgr_gem->lock);
2681 		if (!bo_gem->global_name) {
2682 			bo_gem->global_name = flink.name;
2683 			bo_gem->reusable = false;
2684 
2685 			HASH_ADD(name_hh, bufmgr_gem->name_table,
2686 				 global_name, sizeof(bo_gem->global_name),
2687 				 bo_gem);
2688 		}
2689 		pthread_mutex_unlock(&bufmgr_gem->lock);
2690 	}
2691 
2692 	*name = bo_gem->global_name;
2693 	return 0;
2694 }
2695 
2696 /**
2697  * Enables unlimited caching of buffer objects for reuse.
2698  *
2699  * This is potentially very memory expensive, as the cache at each bucket
2700  * size is only bounded by how many buffers of that size we've managed to have
2701  * in flight at once.
2702  */
2703 drm_public void
drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr * bufmgr)2704 drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2705 {
2706 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2707 
2708 	bufmgr_gem->bo_reuse = true;
2709 }
2710 
2711 /**
2712  * Disables implicit synchronisation before executing the bo
2713  *
2714  * This will cause rendering corruption unless you correctly manage explicit
2715  * fences for all rendering involving this buffer - including use by others.
2716  * Disabling the implicit serialisation is only required if that serialisation
2717  * is too coarse (for example, you have split the buffer into many
2718  * non-overlapping regions and are sharing the whole buffer between concurrent
2719  * independent command streams).
2720  *
2721  * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2722  * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync,
2723  * or subsequent execbufs involving the bo will generate EINVAL.
2724  */
2725 drm_public void
drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo * bo)2726 drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo)
2727 {
2728 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2729 
2730 	bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2731 }
2732 
2733 /**
2734  * Enables implicit synchronisation before executing the bo
2735  *
2736  * This is the default behaviour of the kernel, to wait upon prior writes
2737  * completing on the object before rendering with it, or to wait for prior
2738  * reads to complete before writing into the object.
2739  * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2740  * the kernel never to insert a stall before using the object. Then this
2741  * function can be used to restore the implicit sync before subsequent
2742  * rendering.
2743  */
2744 drm_public void
drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo * bo)2745 drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo)
2746 {
2747 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2748 
2749 	bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2750 }
2751 
2752 /**
2753  * Query whether the kernel supports disabling of its implicit synchronisation
2754  * before execbuf. See drm_intel_gem_bo_disable_implicit_sync()
2755  */
2756 drm_public int
drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr * bufmgr)2757 drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr)
2758 {
2759 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2760 
2761 	return bufmgr_gem->has_exec_async;
2762 }
2763 
2764 /**
2765  * Enable use of fenced reloc type.
2766  *
2767  * New code should enable this to avoid unnecessary fence register
2768  * allocation.  If this option is not enabled, all relocs will have fence
2769  * register allocated.
2770  */
2771 drm_public void
drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr * bufmgr)2772 drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2773 {
2774 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2775 	bufmgr_gem->fenced_relocs = true;
2776 }
2777 
2778 /**
2779  * Return the additional aperture space required by the tree of buffer objects
2780  * rooted at bo.
2781  */
2782 static int
drm_intel_gem_bo_get_aperture_space(drm_intel_bo * bo)2783 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2784 {
2785 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2786 	int i;
2787 	int total = 0;
2788 
2789 	if (bo == NULL || bo_gem->included_in_check_aperture)
2790 		return 0;
2791 
2792 	total += bo->size;
2793 	bo_gem->included_in_check_aperture = true;
2794 
2795 	for (i = 0; i < bo_gem->reloc_count; i++)
2796 		total +=
2797 		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2798 							reloc_target_info[i].bo);
2799 
2800 	return total;
2801 }
2802 
2803 /**
2804  * Count the number of buffers in this list that need a fence reg
2805  *
2806  * If the count is greater than the number of available regs, we'll have
2807  * to ask the caller to resubmit a batch with fewer tiled buffers.
2808  *
2809  * This function over-counts if the same buffer is used multiple times.
2810  */
2811 static unsigned int
drm_intel_gem_total_fences(drm_intel_bo ** bo_array,int count)2812 drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2813 {
2814 	int i;
2815 	unsigned int total = 0;
2816 
2817 	for (i = 0; i < count; i++) {
2818 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2819 
2820 		if (bo_gem == NULL)
2821 			continue;
2822 
2823 		total += bo_gem->reloc_tree_fences;
2824 	}
2825 	return total;
2826 }
2827 
2828 /**
2829  * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2830  * for the next drm_intel_bufmgr_check_aperture_space() call.
2831  */
2832 static void
drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo * bo)2833 drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2834 {
2835 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2836 	int i;
2837 
2838 	if (bo == NULL || !bo_gem->included_in_check_aperture)
2839 		return;
2840 
2841 	bo_gem->included_in_check_aperture = false;
2842 
2843 	for (i = 0; i < bo_gem->reloc_count; i++)
2844 		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2845 							   reloc_target_info[i].bo);
2846 }
2847 
2848 /**
2849  * Return a conservative estimate for the amount of aperture required
2850  * for a collection of buffers. This may double-count some buffers.
2851  */
2852 static unsigned int
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array,int count)2853 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2854 {
2855 	int i;
2856 	unsigned int total = 0;
2857 
2858 	for (i = 0; i < count; i++) {
2859 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2860 		if (bo_gem != NULL)
2861 			total += bo_gem->reloc_tree_size;
2862 	}
2863 	return total;
2864 }
2865 
2866 /**
2867  * Return the amount of aperture needed for a collection of buffers.
2868  * This avoids double counting any buffers, at the cost of looking
2869  * at every buffer in the set.
2870  */
2871 static unsigned int
drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array,int count)2872 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2873 {
2874 	int i;
2875 	unsigned int total = 0;
2876 
2877 	for (i = 0; i < count; i++) {
2878 		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2879 		/* For the first buffer object in the array, we get an
2880 		 * accurate count back for its reloc_tree size (since nothing
2881 		 * had been flagged as being counted yet).  We can save that
2882 		 * value out as a more conservative reloc_tree_size that
2883 		 * avoids double-counting target buffers.  Since the first
2884 		 * buffer happens to usually be the batch buffer in our
2885 		 * callers, this can pull us back from doing the tree
2886 		 * walk on every new batch emit.
2887 		 */
2888 		if (i == 0) {
2889 			drm_intel_bo_gem *bo_gem =
2890 			    (drm_intel_bo_gem *) bo_array[i];
2891 			bo_gem->reloc_tree_size = total;
2892 		}
2893 	}
2894 
2895 	for (i = 0; i < count; i++)
2896 		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2897 	return total;
2898 }
2899 
2900 /**
2901  * Return -1 if the batchbuffer should be flushed before attempting to
2902  * emit rendering referencing the buffers pointed to by bo_array.
2903  *
2904  * This is required because if we try to emit a batchbuffer with relocations
2905  * to a tree of buffers that won't simultaneously fit in the aperture,
2906  * the rendering will return an error at a point where the software is not
2907  * prepared to recover from it.
2908  *
2909  * However, we also want to emit the batchbuffer significantly before we reach
2910  * the limit, as a series of batchbuffers each of which references buffers
2911  * covering almost all of the aperture means that at each emit we end up
2912  * waiting to evict a buffer from the last rendering, and we get synchronous
2913  * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2914  * get better parallelism.
2915  */
2916 static int
drm_intel_gem_check_aperture_space(drm_intel_bo ** bo_array,int count)2917 drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2918 {
2919 	drm_intel_bufmgr_gem *bufmgr_gem =
2920 	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2921 	unsigned int total = 0;
2922 	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2923 	int total_fences;
2924 
2925 	/* Check for fence reg constraints if necessary */
2926 	if (bufmgr_gem->available_fences) {
2927 		total_fences = drm_intel_gem_total_fences(bo_array, count);
2928 		if (total_fences > bufmgr_gem->available_fences)
2929 			return -ENOSPC;
2930 	}
2931 
2932 	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2933 
2934 	if (total > threshold)
2935 		total = drm_intel_gem_compute_batch_space(bo_array, count);
2936 
2937 	if (total > threshold) {
2938 		DBG("check_space: overflowed available aperture, "
2939 		    "%dkb vs %dkb\n",
2940 		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2941 		return -ENOSPC;
2942 	} else {
2943 		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2944 		    (int)bufmgr_gem->gtt_size / 1024);
2945 		return 0;
2946 	}
2947 }
2948 
2949 /*
2950  * Disable buffer reuse for objects which are shared with the kernel
2951  * as scanout buffers
2952  */
2953 static int
drm_intel_gem_bo_disable_reuse(drm_intel_bo * bo)2954 drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2955 {
2956 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2957 
2958 	bo_gem->reusable = false;
2959 	return 0;
2960 }
2961 
2962 static int
drm_intel_gem_bo_is_reusable(drm_intel_bo * bo)2963 drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2964 {
2965 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2966 
2967 	return bo_gem->reusable;
2968 }
2969 
2970 static int
_drm_intel_gem_bo_references(drm_intel_bo * bo,drm_intel_bo * target_bo)2971 _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2972 {
2973 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2974 	int i;
2975 
2976 	for (i = 0; i < bo_gem->reloc_count; i++) {
2977 		if (bo_gem->reloc_target_info[i].bo == target_bo)
2978 			return 1;
2979 		if (bo == bo_gem->reloc_target_info[i].bo)
2980 			continue;
2981 		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2982 						target_bo))
2983 			return 1;
2984 	}
2985 
2986 	for (i = 0; i< bo_gem->softpin_target_count; i++) {
2987 		if (bo_gem->softpin_target[i] == target_bo)
2988 			return 1;
2989 		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2990 			return 1;
2991 	}
2992 
2993 	return 0;
2994 }
2995 
2996 /** Return true if target_bo is referenced by bo's relocation tree. */
2997 static int
drm_intel_gem_bo_references(drm_intel_bo * bo,drm_intel_bo * target_bo)2998 drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2999 {
3000 	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3001 
3002 	if (bo == NULL || target_bo == NULL)
3003 		return 0;
3004 	if (target_bo_gem->used_as_reloc_target)
3005 		return _drm_intel_gem_bo_references(bo, target_bo);
3006 	return 0;
3007 }
3008 
3009 static void
add_bucket(drm_intel_bufmgr_gem * bufmgr_gem,int size)3010 add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3011 {
3012 	unsigned int i = bufmgr_gem->num_buckets;
3013 
3014 	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3015 
3016 	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3017 	bufmgr_gem->cache_bucket[i].size = size;
3018 	bufmgr_gem->num_buckets++;
3019 }
3020 
3021 static void
init_cache_buckets(drm_intel_bufmgr_gem * bufmgr_gem)3022 init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3023 {
3024 	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3025 
3026 	/* OK, so power of two buckets was too wasteful of memory.
3027 	 * Give 3 other sizes between each power of two, to hopefully
3028 	 * cover things accurately enough.  (The alternative is
3029 	 * probably to just go for exact matching of sizes, and assume
3030 	 * that for things like composited window resize the tiled
3031 	 * width/height alignment and rounding of sizes to pages will
3032 	 * get us useful cache hit rates anyway)
3033 	 */
3034 	add_bucket(bufmgr_gem, 4096);
3035 	add_bucket(bufmgr_gem, 4096 * 2);
3036 	add_bucket(bufmgr_gem, 4096 * 3);
3037 
3038 	/* Initialize the linked lists for BO reuse cache. */
3039 	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3040 		add_bucket(bufmgr_gem, size);
3041 
3042 		add_bucket(bufmgr_gem, size + size * 1 / 4);
3043 		add_bucket(bufmgr_gem, size + size * 2 / 4);
3044 		add_bucket(bufmgr_gem, size + size * 3 / 4);
3045 	}
3046 }
3047 
3048 drm_public void
drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr * bufmgr,int limit)3049 drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3050 {
3051 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3052 
3053 	bufmgr_gem->vma_max = limit;
3054 
3055 	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3056 }
3057 
3058 static int
parse_devid_override(const char * devid_override)3059 parse_devid_override(const char *devid_override)
3060 {
3061 	static const struct {
3062 		const char *name;
3063 		int pci_id;
3064 	} name_map[] = {
3065 		{ "brw", PCI_CHIP_I965_GM },
3066 		{ "g4x", PCI_CHIP_GM45_GM },
3067 		{ "ilk", PCI_CHIP_ILD_G },
3068 		{ "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3069 		{ "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3070 		{ "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3071 		{ "byt", PCI_CHIP_VALLEYVIEW_3 },
3072 		{ "bdw", 0x1620 | BDW_ULX },
3073 		{ "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3074 		{ "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3075 	};
3076 	unsigned int i;
3077 
3078 	for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3079 		if (!strcmp(name_map[i].name, devid_override))
3080 			return name_map[i].pci_id;
3081 	}
3082 
3083 	return strtod(devid_override, NULL);
3084 }
3085 
3086 /**
3087  * Get the PCI ID for the device.  This can be overridden by setting the
3088  * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3089  */
3090 static int
get_pci_device_id(drm_intel_bufmgr_gem * bufmgr_gem)3091 get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3092 {
3093 	char *devid_override;
3094 	int devid = 0;
3095 	int ret;
3096 	drm_i915_getparam_t gp;
3097 
3098 	if (geteuid() == getuid()) {
3099 		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3100 		if (devid_override) {
3101 			bufmgr_gem->no_exec = true;
3102 			return parse_devid_override(devid_override);
3103 		}
3104 	}
3105 
3106 	memclear(gp);
3107 	gp.param = I915_PARAM_CHIPSET_ID;
3108 	gp.value = &devid;
3109 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3110 	if (ret) {
3111 		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3112 		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3113 	}
3114 	return devid;
3115 }
3116 
3117 drm_public int
drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr * bufmgr)3118 drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3119 {
3120 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3121 
3122 	return bufmgr_gem->pci_device;
3123 }
3124 
3125 /**
3126  * Sets the AUB filename.
3127  *
3128  * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3129  * for it to have any effect.
3130  */
3131 drm_public void
drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr * bufmgr,const char * filename)3132 drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3133 				      const char *filename)
3134 {
3135 }
3136 
3137 /**
3138  * Sets up AUB dumping.
3139  *
3140  * This is a trace file format that can be used with the simulator.
3141  * Packets are emitted in a format somewhat like GPU command packets.
3142  * You can set up a GTT and upload your objects into the referenced
3143  * space, then send off batchbuffers and get BMPs out the other end.
3144  */
3145 drm_public void
drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr * bufmgr,int enable)3146 drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3147 {
3148 	fprintf(stderr, "libdrm aub dumping is deprecated.\n\n"
3149 		"Use intel_aubdump from intel-gpu-tools instead.  Install intel-gpu-tools,\n"
3150 		"then run (for example)\n\n"
3151 		"\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n"
3152 		"See the intel_aubdump man page for more details.\n");
3153 }
3154 
3155 drm_public drm_intel_context *
drm_intel_gem_context_create(drm_intel_bufmgr * bufmgr)3156 drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3157 {
3158 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3159 	struct drm_i915_gem_context_create create;
3160 	drm_intel_context *context = NULL;
3161 	int ret;
3162 
3163 	context = calloc(1, sizeof(*context));
3164 	if (!context)
3165 		return NULL;
3166 
3167 	memclear(create);
3168 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3169 	if (ret != 0) {
3170 		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3171 		    strerror(errno));
3172 		free(context);
3173 		return NULL;
3174 	}
3175 
3176 	context->ctx_id = create.ctx_id;
3177 	context->bufmgr = bufmgr;
3178 
3179 	return context;
3180 }
3181 
3182 drm_public int
drm_intel_gem_context_get_id(drm_intel_context * ctx,uint32_t * ctx_id)3183 drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id)
3184 {
3185 	if (ctx == NULL)
3186 		return -EINVAL;
3187 
3188 	*ctx_id = ctx->ctx_id;
3189 
3190 	return 0;
3191 }
3192 
3193 drm_public void
drm_intel_gem_context_destroy(drm_intel_context * ctx)3194 drm_intel_gem_context_destroy(drm_intel_context *ctx)
3195 {
3196 	drm_intel_bufmgr_gem *bufmgr_gem;
3197 	struct drm_i915_gem_context_destroy destroy;
3198 	int ret;
3199 
3200 	if (ctx == NULL)
3201 		return;
3202 
3203 	memclear(destroy);
3204 
3205 	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3206 	destroy.ctx_id = ctx->ctx_id;
3207 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3208 		       &destroy);
3209 	if (ret != 0)
3210 		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3211 			strerror(errno));
3212 
3213 	free(ctx);
3214 }
3215 
3216 drm_public int
drm_intel_get_reset_stats(drm_intel_context * ctx,uint32_t * reset_count,uint32_t * active,uint32_t * pending)3217 drm_intel_get_reset_stats(drm_intel_context *ctx,
3218 			  uint32_t *reset_count,
3219 			  uint32_t *active,
3220 			  uint32_t *pending)
3221 {
3222 	drm_intel_bufmgr_gem *bufmgr_gem;
3223 	struct drm_i915_reset_stats stats;
3224 	int ret;
3225 
3226 	if (ctx == NULL)
3227 		return -EINVAL;
3228 
3229 	memclear(stats);
3230 
3231 	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3232 	stats.ctx_id = ctx->ctx_id;
3233 	ret = drmIoctl(bufmgr_gem->fd,
3234 		       DRM_IOCTL_I915_GET_RESET_STATS,
3235 		       &stats);
3236 	if (ret == 0) {
3237 		if (reset_count != NULL)
3238 			*reset_count = stats.reset_count;
3239 
3240 		if (active != NULL)
3241 			*active = stats.batch_active;
3242 
3243 		if (pending != NULL)
3244 			*pending = stats.batch_pending;
3245 	}
3246 
3247 	return ret;
3248 }
3249 
3250 drm_public int
drm_intel_reg_read(drm_intel_bufmgr * bufmgr,uint32_t offset,uint64_t * result)3251 drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3252 		   uint32_t offset,
3253 		   uint64_t *result)
3254 {
3255 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3256 	struct drm_i915_reg_read reg_read;
3257 	int ret;
3258 
3259 	memclear(reg_read);
3260 	reg_read.offset = offset;
3261 
3262 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3263 
3264 	*result = reg_read.val;
3265 	return ret;
3266 }
3267 
3268 drm_public int
drm_intel_get_subslice_total(int fd,unsigned int * subslice_total)3269 drm_intel_get_subslice_total(int fd, unsigned int *subslice_total)
3270 {
3271 	drm_i915_getparam_t gp;
3272 	int ret;
3273 
3274 	memclear(gp);
3275 	gp.value = (int*)subslice_total;
3276 	gp.param = I915_PARAM_SUBSLICE_TOTAL;
3277 	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3278 	if (ret)
3279 		return -errno;
3280 
3281 	return 0;
3282 }
3283 
3284 drm_public int
drm_intel_get_eu_total(int fd,unsigned int * eu_total)3285 drm_intel_get_eu_total(int fd, unsigned int *eu_total)
3286 {
3287 	drm_i915_getparam_t gp;
3288 	int ret;
3289 
3290 	memclear(gp);
3291 	gp.value = (int*)eu_total;
3292 	gp.param = I915_PARAM_EU_TOTAL;
3293 	ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3294 	if (ret)
3295 		return -errno;
3296 
3297 	return 0;
3298 }
3299 
3300 drm_public int
drm_intel_get_pooled_eu(int fd)3301 drm_intel_get_pooled_eu(int fd)
3302 {
3303 	drm_i915_getparam_t gp;
3304 	int ret = -1;
3305 
3306 	memclear(gp);
3307 	gp.param = I915_PARAM_HAS_POOLED_EU;
3308 	gp.value = &ret;
3309 	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3310 		return -errno;
3311 
3312 	return ret;
3313 }
3314 
3315 drm_public int
drm_intel_get_min_eu_in_pool(int fd)3316 drm_intel_get_min_eu_in_pool(int fd)
3317 {
3318 	drm_i915_getparam_t gp;
3319 	int ret = -1;
3320 
3321 	memclear(gp);
3322 	gp.param = I915_PARAM_MIN_EU_IN_POOL;
3323 	gp.value = &ret;
3324 	if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3325 		return -errno;
3326 
3327 	return ret;
3328 }
3329 
3330 /**
3331  * Annotate the given bo for use in aub dumping.
3332  *
3333  * \param annotations is an array of drm_intel_aub_annotation objects
3334  * describing the type of data in various sections of the bo.  Each
3335  * element of the array specifies the type and subtype of a section of
3336  * the bo, and the past-the-end offset of that section.  The elements
3337  * of \c annotations must be sorted so that ending_offset is
3338  * increasing.
3339  *
3340  * \param count is the number of elements in the \c annotations array.
3341  * If \c count is zero, then \c annotations will not be dereferenced.
3342  *
3343  * Annotations are copied into a private data structure, so caller may
3344  * re-use the memory pointed to by \c annotations after the call
3345  * returns.
3346  *
3347  * Annotations are stored for the lifetime of the bo; to reset to the
3348  * default state (no annotations), call this function with a \c count
3349  * of zero.
3350  */
drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo * bo,drm_intel_aub_annotation * annotations,unsigned count)3351 drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3352 					 drm_intel_aub_annotation *annotations,
3353 					 unsigned count)
3354 {
3355 }
3356 
3357 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3358 static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3359 
3360 static drm_intel_bufmgr_gem *
drm_intel_bufmgr_gem_find(int fd)3361 drm_intel_bufmgr_gem_find(int fd)
3362 {
3363 	drm_intel_bufmgr_gem *bufmgr_gem;
3364 
3365 	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3366 		if (bufmgr_gem->fd == fd) {
3367 			atomic_inc(&bufmgr_gem->refcount);
3368 			return bufmgr_gem;
3369 		}
3370 	}
3371 
3372 	return NULL;
3373 }
3374 
3375 static void
drm_intel_bufmgr_gem_unref(drm_intel_bufmgr * bufmgr)3376 drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3377 {
3378 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3379 
3380 	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3381 		pthread_mutex_lock(&bufmgr_list_mutex);
3382 
3383 		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3384 			DRMLISTDEL(&bufmgr_gem->managers);
3385 			drm_intel_bufmgr_gem_destroy(bufmgr);
3386 		}
3387 
3388 		pthread_mutex_unlock(&bufmgr_list_mutex);
3389 	}
3390 }
3391 
drm_intel_gem_bo_map__gtt(drm_intel_bo * bo)3392 drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo)
3393 {
3394 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3395 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3396 
3397 	if (bo_gem->gtt_virtual)
3398 		return bo_gem->gtt_virtual;
3399 
3400 	if (bo_gem->is_userptr)
3401 		return NULL;
3402 
3403 	pthread_mutex_lock(&bufmgr_gem->lock);
3404 	if (bo_gem->gtt_virtual == NULL) {
3405 		struct drm_i915_gem_mmap_gtt mmap_arg;
3406 		void *ptr;
3407 
3408 		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3409 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3410 
3411 		if (bo_gem->map_count++ == 0)
3412 			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3413 
3414 		memclear(mmap_arg);
3415 		mmap_arg.handle = bo_gem->gem_handle;
3416 
3417 		/* Get the fake offset back... */
3418 		ptr = MAP_FAILED;
3419 		if (drmIoctl(bufmgr_gem->fd,
3420 			     DRM_IOCTL_I915_GEM_MMAP_GTT,
3421 			     &mmap_arg) == 0) {
3422 			/* and mmap it */
3423 			ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3424 				       MAP_SHARED, bufmgr_gem->fd,
3425 				       mmap_arg.offset);
3426 		}
3427 		if (ptr == MAP_FAILED) {
3428 			if (--bo_gem->map_count == 0)
3429 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3430 			ptr = NULL;
3431 		}
3432 
3433 		bo_gem->gtt_virtual = ptr;
3434 	}
3435 	pthread_mutex_unlock(&bufmgr_gem->lock);
3436 
3437 	return bo_gem->gtt_virtual;
3438 }
3439 
drm_intel_gem_bo_map__cpu(drm_intel_bo * bo)3440 drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo)
3441 {
3442 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3443 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3444 
3445 	if (bo_gem->mem_virtual)
3446 		return bo_gem->mem_virtual;
3447 
3448 	if (bo_gem->is_userptr) {
3449 		/* Return the same user ptr */
3450 		return bo_gem->user_virtual;
3451 	}
3452 
3453 	pthread_mutex_lock(&bufmgr_gem->lock);
3454 	if (!bo_gem->mem_virtual) {
3455 		struct drm_i915_gem_mmap mmap_arg;
3456 
3457 		if (bo_gem->map_count++ == 0)
3458 			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3459 
3460 		DBG("bo_map: %d (%s), map_count=%d\n",
3461 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3462 
3463 		memclear(mmap_arg);
3464 		mmap_arg.handle = bo_gem->gem_handle;
3465 		mmap_arg.size = bo->size;
3466 		if (drmIoctl(bufmgr_gem->fd,
3467 			     DRM_IOCTL_I915_GEM_MMAP,
3468 			     &mmap_arg)) {
3469 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3470 			    __FILE__, __LINE__, bo_gem->gem_handle,
3471 			    bo_gem->name, strerror(errno));
3472 			if (--bo_gem->map_count == 0)
3473 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3474 		} else {
3475 			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3476 			bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3477 		}
3478 	}
3479 	pthread_mutex_unlock(&bufmgr_gem->lock);
3480 
3481 	return bo_gem->mem_virtual;
3482 }
3483 
drm_intel_gem_bo_map__wc(drm_intel_bo * bo)3484 drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo)
3485 {
3486 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
3487 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3488 
3489 	if (bo_gem->wc_virtual)
3490 		return bo_gem->wc_virtual;
3491 
3492 	if (bo_gem->is_userptr)
3493 		return NULL;
3494 
3495 	pthread_mutex_lock(&bufmgr_gem->lock);
3496 	if (!bo_gem->wc_virtual) {
3497 		struct drm_i915_gem_mmap mmap_arg;
3498 
3499 		if (bo_gem->map_count++ == 0)
3500 			drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
3501 
3502 		DBG("bo_map: %d (%s), map_count=%d\n",
3503 		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3504 
3505 		memclear(mmap_arg);
3506 		mmap_arg.handle = bo_gem->gem_handle;
3507 		mmap_arg.size = bo->size;
3508 		mmap_arg.flags = I915_MMAP_WC;
3509 		if (drmIoctl(bufmgr_gem->fd,
3510 			     DRM_IOCTL_I915_GEM_MMAP,
3511 			     &mmap_arg)) {
3512 			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3513 			    __FILE__, __LINE__, bo_gem->gem_handle,
3514 			    bo_gem->name, strerror(errno));
3515 			if (--bo_gem->map_count == 0)
3516 				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
3517 		} else {
3518 			VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3519 			bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3520 		}
3521 	}
3522 	pthread_mutex_unlock(&bufmgr_gem->lock);
3523 
3524 	return bo_gem->wc_virtual;
3525 }
3526 
3527 /**
3528  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3529  * and manage map buffer objections.
3530  *
3531  * \param fd File descriptor of the opened DRM device.
3532  */
3533 drm_public drm_intel_bufmgr *
drm_intel_bufmgr_gem_init(int fd,int batch_size)3534 drm_intel_bufmgr_gem_init(int fd, int batch_size)
3535 {
3536 	drm_intel_bufmgr_gem *bufmgr_gem;
3537 	struct drm_i915_gem_get_aperture aperture;
3538 	drm_i915_getparam_t gp;
3539 	int ret, tmp;
3540 
3541 	pthread_mutex_lock(&bufmgr_list_mutex);
3542 
3543 	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3544 	if (bufmgr_gem)
3545 		goto exit;
3546 
3547 	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3548 	if (bufmgr_gem == NULL)
3549 		goto exit;
3550 
3551 	bufmgr_gem->fd = fd;
3552 	atomic_set(&bufmgr_gem->refcount, 1);
3553 
3554 	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3555 		free(bufmgr_gem);
3556 		bufmgr_gem = NULL;
3557 		goto exit;
3558 	}
3559 
3560 	memclear(aperture);
3561 	ret = drmIoctl(bufmgr_gem->fd,
3562 		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3563 		       &aperture);
3564 
3565 	if (ret == 0)
3566 		bufmgr_gem->gtt_size = aperture.aper_available_size;
3567 	else {
3568 		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3569 			strerror(errno));
3570 		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3571 		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3572 			"May lead to reduced performance or incorrect "
3573 			"rendering.\n",
3574 			(int)bufmgr_gem->gtt_size / 1024);
3575 	}
3576 
3577 	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3578 
3579 	if (IS_GEN2(bufmgr_gem->pci_device))
3580 		bufmgr_gem->gen = 2;
3581 	else if (IS_GEN3(bufmgr_gem->pci_device))
3582 		bufmgr_gem->gen = 3;
3583 	else if (IS_GEN4(bufmgr_gem->pci_device))
3584 		bufmgr_gem->gen = 4;
3585 	else if (IS_GEN5(bufmgr_gem->pci_device))
3586 		bufmgr_gem->gen = 5;
3587 	else if (IS_GEN6(bufmgr_gem->pci_device))
3588 		bufmgr_gem->gen = 6;
3589 	else if (IS_GEN7(bufmgr_gem->pci_device))
3590 		bufmgr_gem->gen = 7;
3591 	else if (IS_GEN8(bufmgr_gem->pci_device))
3592 		bufmgr_gem->gen = 8;
3593 	else if (!intel_get_genx(bufmgr_gem->pci_device, &bufmgr_gem->gen)) {
3594 		free(bufmgr_gem);
3595 		bufmgr_gem = NULL;
3596 		goto exit;
3597 	}
3598 
3599 	if (IS_GEN3(bufmgr_gem->pci_device) &&
3600 	    bufmgr_gem->gtt_size > 256*1024*1024) {
3601 		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3602 		 * be used for tiled blits. To simplify the accounting, just
3603 		 * subtract the unmappable part (fixed to 256MB on all known
3604 		 * gen3 devices) if the kernel advertises it. */
3605 		bufmgr_gem->gtt_size -= 256*1024*1024;
3606 	}
3607 
3608 	memclear(gp);
3609 	gp.value = &tmp;
3610 
3611 	gp.param = I915_PARAM_HAS_EXECBUF2;
3612 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3613 	if (ret) {
3614 		fprintf(stderr, "i915 does not support EXECBUFER2\n");
3615 		free(bufmgr_gem);
3616 		bufmgr_gem = NULL;
3617         goto exit;
3618     }
3619 
3620 	gp.param = I915_PARAM_HAS_BSD;
3621 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3622 	bufmgr_gem->has_bsd = ret == 0;
3623 
3624 	gp.param = I915_PARAM_HAS_BLT;
3625 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3626 	bufmgr_gem->has_blt = ret == 0;
3627 
3628 	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3629 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3630 	bufmgr_gem->has_relaxed_fencing = ret == 0;
3631 
3632 	gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3633 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3634 	bufmgr_gem->has_exec_async = ret == 0;
3635 
3636 	bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3637 
3638 	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3639 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3640 	bufmgr_gem->has_wait_timeout = ret == 0;
3641 
3642 	gp.param = I915_PARAM_HAS_LLC;
3643 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3644 	if (ret != 0) {
3645 		/* Kernel does not supports HAS_LLC query, fallback to GPU
3646 		 * generation detection and assume that we have LLC on GEN6/7
3647 		 */
3648 		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3649 				IS_GEN7(bufmgr_gem->pci_device));
3650 	} else
3651 		bufmgr_gem->has_llc = *gp.value;
3652 
3653 	gp.param = I915_PARAM_HAS_VEBOX;
3654 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3655 	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3656 
3657 	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3658 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3659 	if (ret == 0 && *gp.value > 0)
3660 		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
3661 
3662 	if (bufmgr_gem->gen < 4) {
3663 		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3664 		gp.value = &bufmgr_gem->available_fences;
3665 		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3666 		if (ret) {
3667 			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3668 				errno);
3669 			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3670 				*gp.value);
3671 			bufmgr_gem->available_fences = 0;
3672 		} else {
3673 			/* XXX The kernel reports the total number of fences,
3674 			 * including any that may be pinned.
3675 			 *
3676 			 * We presume that there will be at least one pinned
3677 			 * fence for the scanout buffer, but there may be more
3678 			 * than one scanout and the user may be manually
3679 			 * pinning buffers. Let's move to execbuffer2 and
3680 			 * thereby forget the insanity of using fences...
3681 			 */
3682 			bufmgr_gem->available_fences -= 2;
3683 			if (bufmgr_gem->available_fences < 0)
3684 				bufmgr_gem->available_fences = 0;
3685 		}
3686 	}
3687 
3688 	if (bufmgr_gem->gen >= 8) {
3689 		gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3690 		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3691 		if (ret == 0 && *gp.value == 3)
3692 			bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range;
3693 	}
3694 
3695 	/* Let's go with one relocation per every 2 dwords (but round down a bit
3696 	 * since a power of two will mean an extra page allocation for the reloc
3697 	 * buffer).
3698 	 *
3699 	 * Every 4 was too few for the blender benchmark.
3700 	 */
3701 	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3702 
3703 	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3704 	bufmgr_gem->bufmgr.bo_alloc_for_render =
3705 	    drm_intel_gem_bo_alloc_for_render;
3706 	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3707 	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3708 	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3709 	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3710 	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3711 	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3712 	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3713 	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3714 	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3715 	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3716 	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3717 	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3718 	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3719 	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3720 	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3721 	bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3722 	bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3723 	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3724 	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3725 	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3726 	bufmgr_gem->bufmgr.debug = 0;
3727 	bufmgr_gem->bufmgr.check_aperture_space =
3728 	    drm_intel_gem_check_aperture_space;
3729 	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3730 	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3731 	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3732 	    drm_intel_gem_get_pipe_from_crtc_id;
3733 	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3734 
3735 	init_cache_buckets(bufmgr_gem);
3736 
3737 	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3738 	bufmgr_gem->vma_max = -1; /* unlimited by default */
3739 
3740 	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3741 
3742 exit:
3743 	pthread_mutex_unlock(&bufmgr_list_mutex);
3744 
3745 	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3746 }
3747