• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_I915
8 
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17 
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 #include "external/i915_drm.h"
21 #include "util.h"
22 
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25 
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27 						   DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28 						   DRM_FORMAT_RGB565,	   DRM_FORMAT_XBGR2101010,
29 						   DRM_FORMAT_XBGR8888,	   DRM_FORMAT_XRGB2101010,
30 						   DRM_FORMAT_XRGB8888 };
31 
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33 
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35 						 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36 
37 static const uint64_t gen_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Y_TILED,
38 					       I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR };
39 
40 static const uint64_t gen12_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
41 						 I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
42 						 DRM_FORMAT_MOD_LINEAR };
43 
44 static const uint64_t gen11_modifier_order[] = { I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
45 						 DRM_FORMAT_MOD_LINEAR };
46 
47 static const uint64_t xe_lpdp_modifier_order[] = { I915_FORMAT_MOD_4_TILED_MTL_RC_CCS,
48 						   I915_FORMAT_MOD_4_TILED, I915_FORMAT_MOD_X_TILED,
49 						   DRM_FORMAT_MOD_LINEAR };
50 
51 struct modifier_support_t {
52 	const uint64_t *order;
53 	uint32_t count;
54 };
55 
56 struct i915_device {
57 	uint32_t graphics_version;
58 	int32_t has_llc;
59 	int32_t has_hw_protection;
60 	struct modifier_support_t modifier;
61 	int device_id;
62 	bool is_xelpd;
63 	/*TODO : cleanup is_mtl to avoid adding variables for every new platforms */
64 	bool is_mtl;
65 	int32_t num_fences_avail;
66 	bool has_mmap_offset;
67 };
68 
i915_info_from_device_id(struct i915_device * i915)69 static void i915_info_from_device_id(struct i915_device *i915)
70 {
71 	const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
72 				      0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
73 	const uint16_t gen4_ids[] = { 0x29A2, 0x2992, 0x2982, 0x2972, 0x2A02, 0x2A12, 0x2A42,
74 				      0x2E02, 0x2E12, 0x2E22, 0x2E32, 0x2E42, 0x2E92 };
75 	const uint16_t gen5_ids[] = { 0x0042, 0x0046 };
76 	const uint16_t gen6_ids[] = { 0x0102, 0x0112, 0x0122, 0x0106, 0x0116, 0x0126, 0x010A };
77 	const uint16_t gen7_ids[] = {
78 		0x0152, 0x0162, 0x0156, 0x0166, 0x015a, 0x016a, 0x0402, 0x0412, 0x0422,
79 		0x0406, 0x0416, 0x0426, 0x040A, 0x041A, 0x042A, 0x040B, 0x041B, 0x042B,
80 		0x040E, 0x041E, 0x042E, 0x0C02, 0x0C12, 0x0C22, 0x0C06, 0x0C16, 0x0C26,
81 		0x0C0A, 0x0C1A, 0x0C2A, 0x0C0B, 0x0C1B, 0x0C2B, 0x0C0E, 0x0C1E, 0x0C2E,
82 		0x0A02, 0x0A12, 0x0A22, 0x0A06, 0x0A16, 0x0A26, 0x0A0A, 0x0A1A, 0x0A2A,
83 		0x0A0B, 0x0A1B, 0x0A2B, 0x0A0E, 0x0A1E, 0x0A2E, 0x0D02, 0x0D12, 0x0D22,
84 		0x0D06, 0x0D16, 0x0D26, 0x0D0A, 0x0D1A, 0x0D2A, 0x0D0B, 0x0D1B, 0x0D2B,
85 		0x0D0E, 0x0D1E, 0x0D2E, 0x0F31, 0x0F32, 0x0F33, 0x0157, 0x0155
86 	};
87 	const uint16_t gen8_ids[] = { 0x22B0, 0x22B1, 0x22B2, 0x22B3, 0x1602, 0x1606,
88 				      0x160A, 0x160B, 0x160D, 0x160E, 0x1612, 0x1616,
89 				      0x161A, 0x161B, 0x161D, 0x161E, 0x1622, 0x1626,
90 				      0x162A, 0x162B, 0x162D, 0x162E };
91 	const uint16_t gen9_ids[] = {
92 		0x1902, 0x1906, 0x190A, 0x190B, 0x190E, 0x1912, 0x1913, 0x1915, 0x1916, 0x1917,
93 		0x191A, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, 0x1926, 0x1927, 0x192A, 0x192B,
94 		0x192D, 0x1932, 0x193A, 0x193B, 0x193D, 0x0A84, 0x1A84, 0x1A85, 0x5A84, 0x5A85,
95 		0x3184, 0x3185, 0x5902, 0x5906, 0x590A, 0x5908, 0x590B, 0x590E, 0x5913, 0x5915,
96 		0x5917, 0x5912, 0x5916, 0x591A, 0x591B, 0x591D, 0x591E, 0x5921, 0x5923, 0x5926,
97 		0x5927, 0x593B, 0x591C, 0x87C0, 0x87CA, 0x3E90, 0x3E93, 0x3E99, 0x3E9C, 0x3E91,
98 		0x3E92, 0x3E96, 0x3E98, 0x3E9A, 0x3E9B, 0x3E94, 0x3EA9, 0x3EA5, 0x3EA6, 0x3EA7,
99 		0x3EA8, 0x3EA1, 0x3EA4, 0x3EA0, 0x3EA3, 0x3EA2, 0x9B21, 0x9BA0, 0x9BA2, 0x9BA4,
100 		0x9BA5, 0x9BA8, 0x9BAA, 0x9BAB, 0x9BAC, 0x9B41, 0x9BC0, 0x9BC2, 0x9BC4, 0x9BC5,
101 		0x9BC6, 0x9BC8, 0x9BCA, 0x9BCB, 0x9BCC, 0x9BE6, 0x9BF6
102 	};
103 	const uint16_t gen11_ids[] = { 0x8A50, 0x8A51, 0x8A52, 0x8A53, 0x8A54, 0x8A56, 0x8A57,
104 				       0x8A58, 0x8A59, 0x8A5A, 0x8A5B, 0x8A5C, 0x8A5D, 0x8A71,
105 				       0x4500, 0x4541, 0x4551, 0x4555, 0x4557, 0x4571, 0x4E51,
106 				       0x4E55, 0x4E57, 0x4E61, 0x4E71 };
107 	const uint16_t gen12_ids[] = {
108 		0x4c8a, 0x4c8b, 0x4c8c, 0x4c90, 0x4c9a, 0x4680, 0x4681, 0x4682, 0x4683, 0x4688,
109 		0x4689, 0x4690, 0x4691, 0x4692, 0x4693, 0x4698, 0x4699, 0x4626, 0x4628, 0x462a,
110 		0x46a0, 0x46a1, 0x46a2, 0x46a3, 0x46a6, 0x46a8, 0x46aa, 0x46b0, 0x46b1, 0x46b2,
111 		0x46b3, 0x46c0, 0x46c1, 0x46c2, 0x46c3, 0x9A40, 0x9A49, 0x9A59, 0x9A60, 0x9A68,
112 		0x9A70, 0x9A78, 0x9AC0, 0x9AC9, 0x9AD9, 0x9AF8, 0x4905, 0x4906, 0x4907, 0x4908
113 	};
114 	const uint16_t adlp_ids[] = { 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A6, 0x46A8,
115 				      0x46AA, 0x462A, 0x4626, 0x4628, 0x46B0, 0x46B1,
116 				      0x46B2, 0x46B3, 0x46C0, 0x46C1, 0x46C2, 0x46C3,
117 				      0x46D0, 0x46D1, 0x46D2, 0x46D3, 0x46D4 };
118 
119 	const uint16_t rplp_ids[] = { 0xA720, 0xA721, 0xA7A0, 0xA7A1, 0xA7A8,
120 				      0xA7A9, 0xA7AA, 0xA7AB, 0xA7AC, 0xA7AD };
121 
122 	const uint16_t mtl_ids[] = { 0x7D40, 0x7D60, 0x7D45, 0x7D55, 0x7DD5 };
123 
124 	unsigned i;
125 	i915->graphics_version = 4;
126 	i915->is_xelpd = false;
127 	i915->is_mtl = false;
128 
129 	for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
130 		if (gen3_ids[i] == i915->device_id)
131 			i915->graphics_version = 3;
132 
133 	/* Gen 4 */
134 	for (i = 0; i < ARRAY_SIZE(gen4_ids); i++)
135 		if (gen4_ids[i] == i915->device_id)
136 			i915->graphics_version = 4;
137 
138 	/* Gen 5 */
139 	for (i = 0; i < ARRAY_SIZE(gen5_ids); i++)
140 		if (gen5_ids[i] == i915->device_id)
141 			i915->graphics_version = 5;
142 
143 	/* Gen 6 */
144 	for (i = 0; i < ARRAY_SIZE(gen6_ids); i++)
145 		if (gen6_ids[i] == i915->device_id)
146 			i915->graphics_version = 6;
147 
148 	/* Gen 7 */
149 	for (i = 0; i < ARRAY_SIZE(gen7_ids); i++)
150 		if (gen7_ids[i] == i915->device_id)
151 			i915->graphics_version = 7;
152 
153 	/* Gen 8 */
154 	for (i = 0; i < ARRAY_SIZE(gen8_ids); i++)
155 		if (gen8_ids[i] == i915->device_id)
156 			i915->graphics_version = 8;
157 
158 	/* Gen 9 */
159 	for (i = 0; i < ARRAY_SIZE(gen9_ids); i++)
160 		if (gen9_ids[i] == i915->device_id)
161 			i915->graphics_version = 9;
162 
163 	/* Gen 11 */
164 	for (i = 0; i < ARRAY_SIZE(gen11_ids); i++)
165 		if (gen11_ids[i] == i915->device_id)
166 			i915->graphics_version = 11;
167 
168 	/* Gen 12 */
169 	for (i = 0; i < ARRAY_SIZE(gen12_ids); i++)
170 		if (gen12_ids[i] == i915->device_id)
171 			i915->graphics_version = 12;
172 
173 	for (i = 0; i < ARRAY_SIZE(adlp_ids); i++)
174 		if (adlp_ids[i] == i915->device_id) {
175 			i915->is_xelpd = true;
176 			i915->graphics_version = 12;
177 		}
178 
179 	for (i = 0; i < ARRAY_SIZE(rplp_ids); i++)
180 		if (rplp_ids[i] == i915->device_id) {
181 			i915->is_xelpd = true;
182 			i915->graphics_version = 12;
183 		}
184 
185 	for (i = 0; i < ARRAY_SIZE(mtl_ids); i++)
186 		if (mtl_ids[i] == i915->device_id) {
187 			i915->graphics_version = 12;
188 			i915->is_mtl = true;
189 		}
190 }
191 
i915_get_modifier_order(struct i915_device * i915)192 static void i915_get_modifier_order(struct i915_device *i915)
193 {
194 	if (i915->is_mtl) {
195 		i915->modifier.order = xe_lpdp_modifier_order;
196 		i915->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
197 	} else if (i915->graphics_version == 12) {
198 		i915->modifier.order = gen12_modifier_order;
199 		i915->modifier.count = ARRAY_SIZE(gen12_modifier_order);
200 	} else if (i915->graphics_version == 11) {
201 		i915->modifier.order = gen11_modifier_order;
202 		i915->modifier.count = ARRAY_SIZE(gen11_modifier_order);
203 	} else {
204 		i915->modifier.order = gen_modifier_order;
205 		i915->modifier.count = ARRAY_SIZE(gen_modifier_order);
206 	}
207 }
208 
unset_flags(uint64_t current_flags,uint64_t mask)209 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
210 {
211 	uint64_t value = current_flags & ~mask;
212 	return value;
213 }
214 
i915_add_combinations(struct driver * drv)215 static int i915_add_combinations(struct driver *drv)
216 {
217 	struct i915_device *i915 = drv->priv;
218 
219 	const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
220 	const uint64_t render = BO_USE_RENDER_MASK;
221 	const uint64_t texture_only = BO_USE_TEXTURE_MASK;
222 	// HW protected buffers also need to be scanned out.
223 	const uint64_t hw_protected =
224 	    i915->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
225 
226 	const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
227 				     BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
228 				     BO_USE_SW_WRITE_RARELY;
229 
230 	struct format_metadata metadata_linear = { .tiling = I915_TILING_NONE,
231 						   .priority = 1,
232 						   .modifier = DRM_FORMAT_MOD_LINEAR };
233 
234 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
235 			     &metadata_linear, scanout_and_render);
236 
237 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
238 			     render);
239 
240 	drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
241 			     &metadata_linear, texture_only);
242 
243 	drv_modify_linear_combinations(drv);
244 
245 	/* NV12 format for camera, display, decoding and encoding. */
246 	/* IPU3 camera ISP supports only NV12 output. */
247 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
248 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
249 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
250 				   hw_protected);
251 
252 	/* P010 linear can be used for scanout too. */
253 	drv_modify_combination(drv, DRM_FORMAT_P010, &metadata_linear, BO_USE_SCANOUT);
254 
255 	/*
256 	 * Android also frequently requests YV12 formats for some camera implementations
257 	 * (including the external provider implmenetation).
258 	 */
259 	drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &metadata_linear,
260 			       BO_USE_CAMERA_WRITE);
261 
262 	/* Android CTS tests require this. */
263 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
264 
265 	/*
266 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
267 	 * from camera and input/output from hardware decoder/encoder.
268 	 */
269 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
270 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
271 				   BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
272 				   BO_USE_SENSOR_DIRECT_DATA);
273 
274 	const uint64_t render_not_linear = unset_flags(render, linear_mask);
275 	const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
276 
277 	struct format_metadata metadata_x_tiled = { .tiling = I915_TILING_X,
278 						    .priority = 2,
279 						    .modifier = I915_FORMAT_MOD_X_TILED };
280 
281 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
282 			     render_not_linear);
283 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
284 			     &metadata_x_tiled, scanout_and_render_not_linear);
285 
286 	const uint64_t nv12_usage =
287 	    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
288 	const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected |
289 				    (i915->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
290 
291 	if (i915->is_mtl) {
292 		struct format_metadata metadata_4_tiled = { .tiling = I915_TILING_4,
293 							    .priority = 3,
294 							    .modifier = I915_FORMAT_MOD_4_TILED };
295 
296 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
297 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
298 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
299 				     &metadata_4_tiled, render_not_linear);
300 		drv_add_combinations(drv, scanout_render_formats,
301 				     ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
302 				     scanout_and_render_not_linear);
303 	} else {
304 		struct format_metadata metadata_y_tiled = { .tiling = I915_TILING_Y,
305 							    .priority = 3,
306 							    .modifier = I915_FORMAT_MOD_Y_TILED };
307 
308 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
309 				     &metadata_y_tiled, render_not_linear);
310 		/* Y-tiled scanout isn't available on old platforms so we add
311 		 * |scanout_render_formats| without that USE flag.
312 		 */
313 		drv_add_combinations(drv, scanout_render_formats,
314 				     ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
315 				     render_not_linear);
316 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
317 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
318 	}
319 	return 0;
320 }
321 
i915_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)322 static int i915_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
323 				 uint32_t *aligned_height)
324 {
325 	struct i915_device *i915 = bo->drv->priv;
326 	uint32_t horizontal_alignment;
327 	uint32_t vertical_alignment;
328 
329 	switch (tiling) {
330 	default:
331 	case I915_TILING_NONE:
332 		/*
333 		 * The Intel GPU doesn't need any alignment in linear mode,
334 		 * but libva requires the allocation stride to be aligned to
335 		 * 16 bytes and height to 4 rows. Further, we round up the
336 		 * horizontal alignment so that row start on a cache line (64
337 		 * bytes).
338 		 */
339 #ifdef LINEAR_ALIGN_256
340 		/*
341 		 * If we want to import these buffers to amdgpu they need to
342 		 * their match LINEAR_ALIGNED requirement of 256 byte alignement.
343 		 */
344 		horizontal_alignment = 256;
345 #else
346 		horizontal_alignment = 64;
347 #endif
348 
349 		/*
350 		 * For hardware video encoding buffers, we want to align to the size of a
351 		 * macroblock, because otherwise we will end up encoding uninitialized data.
352 		 * This can result in substantial quality degradations, especially on lower
353 		 * resolution videos, because this uninitialized data may be high entropy.
354 		 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
355 		 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
356 		 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
357 		 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
358 		 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
359 		 * will cause the GPU to read out-of-bounds.
360 		 *
361 		 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
362 		 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
363 		 * virtgpu_cross_domain backend which passes that flag through.
364 		 */
365 		if (bo->meta.use_flags & BO_USE_HW_VIDEO_ENCODER) {
366 			vertical_alignment = 8;
367 		} else if (format == DRM_FORMAT_R8 && *aligned_height == 1) {
368 			vertical_alignment = 1;
369 		} else {
370 			vertical_alignment = 4;
371 		}
372 
373 		break;
374 
375 	case I915_TILING_X:
376 		horizontal_alignment = 512;
377 		vertical_alignment = 8;
378 		break;
379 
380 	case I915_TILING_Y:
381 	case I915_TILING_4:
382 		if (i915->graphics_version == 3) {
383 			horizontal_alignment = 512;
384 			vertical_alignment = 8;
385 		} else {
386 			horizontal_alignment = 128;
387 			vertical_alignment = 32;
388 		}
389 		break;
390 	}
391 
392 	*aligned_height = ALIGN(*aligned_height, vertical_alignment);
393 	if (i915->graphics_version > 3) {
394 		*stride = ALIGN(*stride, horizontal_alignment);
395 	} else {
396 		while (*stride > horizontal_alignment)
397 			horizontal_alignment <<= 1;
398 
399 		*stride = horizontal_alignment;
400 	}
401 
402 	if (i915->graphics_version <= 3 && *stride > 8192)
403 		return -EINVAL;
404 
405 	return 0;
406 }
407 
i915_clflush(void * start,size_t size)408 static void i915_clflush(void *start, size_t size)
409 {
410 	void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
411 	void *end = (void *)((uintptr_t)start + size);
412 
413 	__builtin_ia32_mfence();
414 	while (p < end) {
415 #if defined(__CLFLUSHOPT__)
416 		__builtin_ia32_clflushopt(p);
417 #else
418 		__builtin_ia32_clflush(p);
419 #endif
420 		p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
421 	}
422 	__builtin_ia32_mfence();
423 }
424 
i915_init(struct driver * drv)425 static int i915_init(struct driver *drv)
426 {
427 	int ret, val;
428 	struct i915_device *i915;
429 	drm_i915_getparam_t get_param = { 0 };
430 
431 	i915 = calloc(1, sizeof(*i915));
432 	if (!i915)
433 		return -ENOMEM;
434 
435 	get_param.param = I915_PARAM_CHIPSET_ID;
436 	get_param.value = &(i915->device_id);
437 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
438 	if (ret) {
439 		drv_loge("Failed to get I915_PARAM_CHIPSET_ID\n");
440 		free(i915);
441 		return -EINVAL;
442 	}
443 	/* must call before i915->graphics_version is used anywhere else */
444 	i915_info_from_device_id(i915);
445 
446 	i915_get_modifier_order(i915);
447 
448 	memset(&get_param, 0, sizeof(get_param));
449 	get_param.param = I915_PARAM_HAS_LLC;
450 	get_param.value = &i915->has_llc;
451 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
452 	if (ret) {
453 		drv_loge("Failed to get I915_PARAM_HAS_LLC\n");
454 		free(i915);
455 		return -EINVAL;
456 	}
457 
458 	memset(&get_param, 0, sizeof(get_param));
459 	get_param.param = I915_PARAM_NUM_FENCES_AVAIL;
460 	get_param.value = &i915->num_fences_avail;
461 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
462 	if (ret) {
463 		drv_loge("Failed to get I915_PARAM_NUM_FENCES_AVAIL\n");
464 		free(i915);
465 		return -EINVAL;
466 	}
467 
468 	memset(&get_param, 0, sizeof(get_param));
469 	get_param.param = I915_PARAM_MMAP_GTT_VERSION;
470 	get_param.value = &val;
471 
472 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
473 	if (ret) {
474 		drv_loge("Failed to get I915_PARAM_MMAP_GTT_VERSION\n");
475 		free(i915);
476 		return -EINVAL;
477 	}
478 	i915->has_mmap_offset = (val >= 4);
479 
480 	if (i915->graphics_version >= 12)
481 		i915->has_hw_protection = 1;
482 
483 	drv->priv = i915;
484 	return i915_add_combinations(drv);
485 }
486 
487 /*
488  * Returns true if the height of a buffer of the given format should be aligned
489  * to the largest coded unit (LCU) assuming that it will be used for video. This
490  * is based on gmmlib's GmmIsYUVFormatLCUAligned().
491  */
i915_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct i915_device * i915)492 static bool i915_format_needs_LCU_alignment(uint32_t format, size_t plane,
493 					    const struct i915_device *i915)
494 {
495 	switch (format) {
496 	case DRM_FORMAT_NV12:
497 	case DRM_FORMAT_P010:
498 	case DRM_FORMAT_P016:
499 		return (i915->graphics_version == 11 || i915->graphics_version == 12) && plane == 1;
500 	}
501 	return false;
502 }
503 
i915_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)504 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
505 {
506 	uint32_t offset;
507 	size_t plane;
508 	int ret, pagesize;
509 	struct i915_device *i915 = bo->drv->priv;
510 
511 	offset = 0;
512 	pagesize = getpagesize();
513 
514 	for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
515 		uint32_t stride = drv_stride_from_format(format, width, plane);
516 		uint32_t plane_height = drv_height_from_format(format, height, plane);
517 
518 		if (bo->meta.tiling != I915_TILING_NONE)
519 			assert(IS_ALIGNED(offset, pagesize));
520 
521 		ret = i915_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
522 		if (ret)
523 			return ret;
524 
525 		if (i915_format_needs_LCU_alignment(format, plane, i915)) {
526 			/*
527 			 * Align the height of the V plane for certain formats to the
528 			 * largest coded unit (assuming that this BO may be used for video)
529 			 * to be consistent with gmmlib.
530 			 */
531 			plane_height = ALIGN(plane_height, 64);
532 		}
533 
534 		bo->meta.strides[plane] = stride;
535 		bo->meta.sizes[plane] = stride * plane_height;
536 		bo->meta.offsets[plane] = offset;
537 		offset += bo->meta.sizes[plane];
538 	}
539 
540 	bo->meta.total_size = ALIGN(offset, pagesize);
541 
542 	return 0;
543 }
544 
i915_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)545 static size_t i915_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
546 {
547 	size_t num_planes = drv_num_planes_from_format(format);
548 	if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
549 	    modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
550 	    modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
551 		assert(num_planes == 1);
552 		return 2;
553 	}
554 	return num_planes;
555 }
556 
557 #define gbm_fls(x)                                                                                 \
558 	((x) ? __builtin_choose_expr(sizeof(x) == 8, 64 - __builtin_clzll(x),                      \
559 				     32 - __builtin_clz(x))                                        \
560 	     : 0)
561 
562 #define roundup_power_of_two(x) ((x) != 0 ? 1ULL << gbm_fls((x) - 1) : 0)
563 
i915_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)564 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
565 				    uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
566 {
567 	struct i915_device *i915 = bo->drv->priv;
568 
569 	uint64_t modifier;
570 	if (modifiers) {
571 		modifier =
572 		    drv_pick_modifier(modifiers, count, i915->modifier.order, i915->modifier.count);
573 	} else {
574 		struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
575 		if (!combo)
576 			return -EINVAL;
577 		modifier = combo->metadata.modifier;
578 	}
579 
580 	/*
581 	 * For cursor buffer, add padding as needed to reach a known cursor-plane-supported
582 	 * buffer size, as reported by the cursor capability properties.
583 	 *
584 	 * If the requested dimensions exceed either of the reported capabilities, or if the
585 	 * capabilities couldn't be read, silently fallback by continuing without additional
586 	 * padding. The buffer can still be used normally, and be committed to non-cursor
587 	 * planes.
588 	 */
589 	if (use_flags & BO_USE_CURSOR) {
590 		uint64_t cursor_width = 0;
591 		uint64_t cursor_height = 0;
592 		bool err = drmGetCap(bo->drv->fd, DRM_CAP_CURSOR_WIDTH, &cursor_width) ||
593 			      drmGetCap(bo->drv->fd, DRM_CAP_CURSOR_HEIGHT, &cursor_height);
594 
595 		if (!err && width <= cursor_width && height <= cursor_height) {
596 			width = cursor_width;
597 			height = cursor_height;
598 		}
599 	}
600 
601 	/*
602 	 * i915 only supports linear/x-tiled above 4096 wide on Gen9/Gen10 GPU.
603 	 * VAAPI decode in NV12 Y tiled format so skip modifier change for NV12/P010 huge bo.
604 	 */
605 	bool huge_bo = (i915->graphics_version < 11) && (width > 4096);
606 	if (huge_bo && format != DRM_FORMAT_NV12 && format != DRM_FORMAT_P010 &&
607 	    modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
608 		uint32_t i;
609 		for (i = 0; modifiers && i < count; i++) {
610 			if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
611 				break;
612 		}
613 		if (i == count)
614 			modifier = DRM_FORMAT_MOD_LINEAR;
615 		else
616 			modifier = I915_FORMAT_MOD_X_TILED;
617 	}
618 
619 	/*
620 	 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
621 	 * Pick y tiled modifier if it has been passed in, otherwise use linear
622 	 */
623 	if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
624 		uint32_t i;
625 		for (i = 0; modifiers && i < count; i++) {
626 			if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
627 				break;
628 		}
629 		if (i == count)
630 			modifier = DRM_FORMAT_MOD_LINEAR;
631 		else
632 			modifier = I915_FORMAT_MOD_Y_TILED;
633 	}
634 
635 	/* Prevent gen 8 and earlier from trying to use a tiling modifier */
636 	if (i915->graphics_version <= 8 && format == DRM_FORMAT_ARGB8888) {
637 		modifier = DRM_FORMAT_MOD_LINEAR;
638 	}
639 
640 	switch (modifier) {
641 	case DRM_FORMAT_MOD_LINEAR:
642 		bo->meta.tiling = I915_TILING_NONE;
643 		break;
644 	case I915_FORMAT_MOD_X_TILED:
645 		bo->meta.tiling = I915_TILING_X;
646 		break;
647 	case I915_FORMAT_MOD_Y_TILED:
648 	case I915_FORMAT_MOD_Y_TILED_CCS:
649 	/* For now support only I915_TILING_Y as this works with all
650 	 * IPs(render/media/display)
651 	 */
652 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
653 		bo->meta.tiling = I915_TILING_Y;
654 		break;
655 	case I915_FORMAT_MOD_4_TILED:
656 	case I915_FORMAT_MOD_4_TILED_MTL_RC_CCS:
657 		bo->meta.tiling = I915_TILING_4;
658 		break;
659 	}
660 
661 	bo->meta.format_modifier = modifier;
662 
663 	if (format == DRM_FORMAT_YVU420_ANDROID) {
664 		/*
665 		 * We only need to be able to use this as a linear texture,
666 		 * which doesn't put any HW restrictions on how we lay it
667 		 * out. The Android format does require the stride to be a
668 		 * multiple of 16 and expects the Cr and Cb stride to be
669 		 * ALIGN(Y_stride / 2, 16), which we can make happen by
670 		 * aligning to 32 bytes here.
671 		 */
672 		uint32_t stride = ALIGN(width, 32);
673 		return drv_bo_from_format(bo, stride, 1, height, format);
674 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
675 		/*
676 		 * For compressed surfaces, we need a color control surface
677 		 * (CCS). Color compression is only supported for Y tiled
678 		 * surfaces, and for each 32x16 tiles in the main surface we
679 		 * need a tile in the control surface.  Y tiles are 128 bytes
680 		 * wide and 32 lines tall and we use that to first compute the
681 		 * width and height in tiles of the main surface. stride and
682 		 * height are already multiples of 128 and 32, respectively:
683 		 */
684 		uint32_t stride = drv_stride_from_format(format, width, 0);
685 		uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
686 		uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
687 		uint32_t size = width_in_tiles * height_in_tiles * 4096;
688 		uint32_t offset = 0;
689 
690 		bo->meta.strides[0] = width_in_tiles * 128;
691 		bo->meta.sizes[0] = size;
692 		bo->meta.offsets[0] = offset;
693 		offset += size;
694 
695 		/*
696 		 * Now, compute the width and height in tiles of the control
697 		 * surface by dividing and rounding up.
698 		 */
699 		uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
700 		uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
701 		uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
702 
703 		/*
704 		 * With stride and height aligned to y tiles, offset is
705 		 * already a multiple of 4096, which is the required alignment
706 		 * of the CCS.
707 		 */
708 		bo->meta.strides[1] = ccs_width_in_tiles * 128;
709 		bo->meta.sizes[1] = ccs_size;
710 		bo->meta.offsets[1] = offset;
711 		offset += ccs_size;
712 
713 		bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
714 		bo->meta.total_size = offset;
715 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
716 		assert(drv_num_planes_from_format(format) > 0);
717 		uint32_t offset = 0;
718 		size_t plane = 0;
719 		size_t a_plane = 0;
720 		/*
721 		 * considering only 128 byte compression and one cache line of
722 		 * aux buffer(64B) contains compression status of 4-Y tiles.
723 		 * Which is 4 * (128B * 32L).
724 		 * line stride(bytes) is 4 * 128B
725 		 * and tile stride(lines) is 32L
726 		 */
727 		for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
728 			uint32_t stride = ALIGN(drv_stride_from_format(format, width, plane), 512);
729 
730 			const uint32_t plane_height = drv_height_from_format(format, height, plane);
731 			uint32_t aligned_height = ALIGN(plane_height, 32);
732 
733 			if (i915->is_xelpd && (stride > 1)) {
734 				stride = 1 << (32 - __builtin_clz(stride - 1));
735 				aligned_height = ALIGN(plane_height, 128);
736 			}
737 
738 			bo->meta.strides[plane] = stride;
739 			/* size calculation & alignment are 64KB aligned
740 			 * size as per spec
741 			 */
742 			bo->meta.sizes[plane] = ALIGN(stride * aligned_height, 512 * 128);
743 			bo->meta.offsets[plane] = offset;
744 			/* next buffer offset */
745 			offset += bo->meta.sizes[plane];
746 		}
747 
748 		/* Aux buffer is linear and page aligned. It is placed after
749 		 * other planes and aligned to main buffer stride.
750 		 */
751 		for (a_plane = 0; a_plane < plane; a_plane++) {
752 			/* Every 64 bytes in the aux plane contain compression information for a
753 			 * sub-row of 4 Y tiles of the corresponding main plane, so the pitch in
754 			 * bytes of the aux plane should be the pitch of the main plane in units of
755 			 * 4 tiles multiplied by 64 (or equivalently, the pitch of the main plane in
756 			 * bytes divided by 8).
757 			 */
758 			bo->meta.strides[plane + a_plane] = bo->meta.strides[a_plane] / 8;
759 			/* Aligned to page size */
760 			bo->meta.sizes[plane + a_plane] =
761 			    ALIGN(bo->meta.sizes[a_plane] / 256, 4 * 1024);
762 			bo->meta.offsets[plane + a_plane] = offset;
763 
764 			/* next buffer offset */
765 			offset += bo->meta.sizes[plane + a_plane];
766 		}
767 		/* Total number of planes & sizes */
768 		bo->meta.num_planes = plane + a_plane;
769 		bo->meta.total_size = offset;
770 	} else if (modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
771 		assert(drv_num_planes_from_format(format) > 0);
772 		uint32_t offset = 0, stride = 0;
773 		size_t plane = 0;
774 		size_t a_plane = 0;
775 		for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
776 			uint32_t alignment = 0, val, tmpoffset = 0;
777 			/*
778 			 * tile_align = 4 (for width) for CCS
779 			 */
780 			stride = ALIGN(drv_stride_from_format(format, width, plane), 512);
781 			height = ALIGN(drv_height_from_format(format, height, plane), 32);
782 			bo->meta.strides[plane] = stride;
783 
784 			/* MTL needs 1MB Alignment */
785 			bo->meta.sizes[plane] = ALIGN(stride * height, 0x100000);
786 			if (plane == 1 &&
787 			    (format == DRM_FORMAT_NV12 || format == DRM_FORMAT_P010)) {
788 				alignment = 1 << 20;
789 				offset += alignment - (offset % alignment);
790 				tmpoffset = offset;
791 				val = roundup_power_of_two(stride);
792 				if ((stride * val) > tmpoffset)
793 					offset = stride * val;
794 			}
795 
796 			bo->meta.offsets[plane] = offset;
797 			offset += bo->meta.sizes[plane];
798 		}
799 
800 		/* Aux buffer is linear and page aligned. It is placed after
801 		 * other planes and aligned to main buffer stride.
802 		 */
803 		for (a_plane = 0; a_plane < plane; a_plane++) {
804 			stride = bo->meta.strides[a_plane] / 8;
805 			bo->meta.strides[a_plane + plane] = stride;
806 
807 			/* Aligned to page size */
808 			bo->meta.sizes[a_plane + plane] =
809 			    ALIGN(bo->meta.sizes[a_plane] / 256, getpagesize());
810 			bo->meta.offsets[a_plane + plane] = offset;
811 			/* next buffer offset */
812 			offset += bo->meta.sizes[plane + a_plane];
813 		}
814 
815 		bo->meta.num_planes = plane + a_plane;
816 		bo->meta.total_size = offset;
817 	} else {
818 		return i915_bo_from_format(bo, width, height, format);
819 	}
820 	return 0;
821 }
822 
i915_bo_create_from_metadata(struct bo * bo)823 static int i915_bo_create_from_metadata(struct bo *bo)
824 {
825 	int ret;
826 	uint32_t gem_handle;
827 	struct drm_i915_gem_set_tiling gem_set_tiling = { 0 };
828 	struct i915_device *i915 = bo->drv->priv;
829 
830 	if (i915->has_hw_protection && (bo->meta.use_flags & BO_USE_PROTECTED)) {
831 		struct drm_i915_gem_create_ext_protected_content protected_content = {
832 			.base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
833 			.flags = 0,
834 		};
835 
836 		struct drm_i915_gem_create_ext create_ext = {
837 			.size = bo->meta.total_size,
838 			.extensions = (uintptr_t)&protected_content,
839 		};
840 
841 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
842 		if (ret) {
843 			drv_loge("DRM_IOCTL_I915_GEM_CREATE_EXT failed (size=%llu) (ret=%d) \n",
844 				 create_ext.size, ret);
845 			return -errno;
846 		}
847 
848 		gem_handle = create_ext.handle;
849 	} else {
850 		struct drm_i915_gem_create gem_create = { 0 };
851 		gem_create.size = bo->meta.total_size;
852 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
853 		if (ret) {
854 			drv_loge("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
855 			return -errno;
856 		}
857 
858 		gem_handle = gem_create.handle;
859 	}
860 
861 	bo->handle.u32 = gem_handle;
862 
863 	/* Set/Get tiling ioctl not supported  based on fence availability
864 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
865 	 */
866 	if (i915->num_fences_avail) {
867 		gem_set_tiling.handle = bo->handle.u32;
868 		gem_set_tiling.tiling_mode = bo->meta.tiling;
869 		gem_set_tiling.stride = bo->meta.strides[0];
870 
871 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
872 		if (ret) {
873 			struct drm_gem_close gem_close = { 0 };
874 			gem_close.handle = bo->handle.u32;
875 			drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
876 
877 			drv_loge("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
878 			return -errno;
879 		}
880 	}
881 
882 	bo->meta.cached = (i915->has_llc || i915->is_mtl) && !(bo->meta.use_flags & BO_USE_SCANOUT);
883 
884 	return 0;
885 }
886 
i915_close(struct driver * drv)887 static void i915_close(struct driver *drv)
888 {
889 	free(drv->priv);
890 	drv->priv = NULL;
891 }
892 
i915_bo_import(struct bo * bo,struct drv_import_fd_data * data)893 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
894 {
895 	int ret;
896 	struct drm_i915_gem_get_tiling gem_get_tiling = { 0 };
897 	struct i915_device *i915 = bo->drv->priv;
898 
899 	bo->meta.num_planes =
900 	    i915_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
901 
902 	ret = drv_prime_bo_import(bo, data);
903 	if (ret)
904 		return ret;
905 
906 	/* Set/Get tiling ioctl not supported  based on fence availability
907 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
908 	 */
909 	if (i915->num_fences_avail) {
910 		/* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
911 		gem_get_tiling.handle = bo->handle.u32;
912 
913 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
914 		if (ret) {
915 			drv_gem_bo_destroy(bo);
916 			drv_loge("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
917 			return ret;
918 		}
919 		bo->meta.tiling = gem_get_tiling.tiling_mode;
920 	}
921 	return 0;
922 }
923 
use_write_combining(struct bo * bo)924 static bool use_write_combining(struct bo *bo)
925 {
926 	/* TODO(b/118799155): We don't seem to have a good way to
927 	 * detect the use cases for which WC mapping is really needed.
928 	 * The current heuristic seems overly coarse and may be slowing
929 	 * down some other use cases unnecessarily.
930 	 *
931 	 * For now, care must be taken not to use WC mappings for
932 	 * Renderscript and camera use cases, as they're
933 	 * performance-sensitive. */
934 	return (bo->meta.use_flags & BO_USE_SCANOUT) &&
935 	       !(bo->meta.use_flags &
936 		 (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE));
937 }
938 
i915_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)939 static void *i915_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
940 {
941 	int ret;
942 	void *addr = MAP_FAILED;
943 	struct i915_device *i915 = bo->drv->priv;
944 
945 	if ((bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_CCS) ||
946 	    (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) ||
947 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED) ||
948 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS))
949 		return MAP_FAILED;
950 
951 	if (bo->meta.tiling == I915_TILING_NONE) {
952 		if (i915->has_mmap_offset) {
953 			struct drm_i915_gem_mmap_offset gem_map = { 0 };
954 			gem_map.handle = bo->handle.u32;
955 			gem_map.flags = I915_MMAP_OFFSET_WB;
956 
957 			if (use_write_combining(bo))
958 				gem_map.flags = I915_MMAP_OFFSET_WC;
959 
960 			/* Get the fake offset back */
961 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_map);
962 			if (ret == 0)
963 				addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags),
964 					    MAP_SHARED, bo->drv->fd, gem_map.offset);
965 		} else {
966 			struct drm_i915_gem_mmap gem_map = { 0 };
967 			if (use_write_combining(bo))
968 				gem_map.flags = I915_MMAP_WC;
969 
970 			gem_map.handle = bo->handle.u32;
971 			gem_map.offset = 0;
972 			gem_map.size = bo->meta.total_size;
973 
974 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
975 			/* DRM_IOCTL_I915_GEM_MMAP mmaps the underlying shm
976 			 * file and returns a user space address directly, ie,
977 			 * doesn't go through mmap. If we try that on a
978 			 * dma-buf that doesn't have a shm file, i915.ko
979 			 * returns ENXIO.  Fall through to
980 			 * DRM_IOCTL_I915_GEM_MMAP_GTT in that case, which
981 			 * will mmap on the drm fd instead. */
982 			if (ret == 0)
983 				addr = (void *)(uintptr_t)gem_map.addr_ptr;
984 		}
985 	}
986 
987 	if (addr == MAP_FAILED) {
988 		struct drm_i915_gem_mmap_gtt gem_map = { 0 };
989 
990 		gem_map.handle = bo->handle.u32;
991 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
992 		if (ret) {
993 			drv_loge("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
994 			return MAP_FAILED;
995 		}
996 
997 		addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
998 			    bo->drv->fd, gem_map.offset);
999 	}
1000 
1001 	if (addr == MAP_FAILED) {
1002 		drv_loge("i915 GEM mmap failed\n");
1003 		return addr;
1004 	}
1005 
1006 	vma->length = bo->meta.total_size;
1007 	return addr;
1008 }
1009 
i915_bo_invalidate(struct bo * bo,struct mapping * mapping)1010 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
1011 {
1012 	int ret;
1013 	struct drm_i915_gem_set_domain set_domain = { 0 };
1014 
1015 	set_domain.handle = bo->handle.u32;
1016 	if (bo->meta.tiling == I915_TILING_NONE) {
1017 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1018 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1019 			set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1020 	} else {
1021 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1022 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1023 			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1024 	}
1025 
1026 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
1027 	if (ret) {
1028 		drv_loge("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
1029 		return ret;
1030 	}
1031 
1032 	return 0;
1033 }
1034 
i915_bo_flush(struct bo * bo,struct mapping * mapping)1035 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
1036 {
1037 	struct i915_device *i915 = bo->drv->priv;
1038 	if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
1039 		i915_clflush(mapping->vma->addr, mapping->vma->length);
1040 
1041 	return 0;
1042 }
1043 
1044 const struct backend backend_i915 = {
1045 	.name = "i915",
1046 	.init = i915_init,
1047 	.close = i915_close,
1048 	.bo_compute_metadata = i915_bo_compute_metadata,
1049 	.bo_create_from_metadata = i915_bo_create_from_metadata,
1050 	.bo_destroy = drv_gem_bo_destroy,
1051 	.bo_import = i915_bo_import,
1052 	.bo_map = i915_bo_map,
1053 	.bo_unmap = drv_bo_munmap,
1054 	.bo_invalidate = i915_bo_invalidate,
1055 	.bo_flush = i915_bo_flush,
1056 	.resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
1057 	.num_planes_from_modifier = i915_num_planes_from_modifier,
1058 };
1059 
1060 #endif
1061