• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_I915
8 
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17 
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 #include "external/i915_drm.h"
21 #include "util.h"
22 
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25 
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27 						   DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28 						   DRM_FORMAT_RGB565,	   DRM_FORMAT_XBGR2101010,
29 						   DRM_FORMAT_XBGR8888,	   DRM_FORMAT_XRGB2101010,
30 						   DRM_FORMAT_XRGB8888 };
31 
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33 
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35 						 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36 
37 static const uint64_t gen_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Y_TILED,
38 					       I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR };
39 
40 static const uint64_t gen12_modifier_order[] = {
41 	I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS, I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS,
42 	I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR
43 };
44 
45 static const uint64_t gen11_modifier_order[] = { I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
46 						 DRM_FORMAT_MOD_LINEAR };
47 
48 static const uint64_t xe_lpdp_modifier_order[] = { I915_FORMAT_MOD_4_TILED_MTL_RC_CCS,
49 						   I915_FORMAT_MOD_4_TILED, I915_FORMAT_MOD_X_TILED,
50 						   DRM_FORMAT_MOD_LINEAR };
51 
52 struct modifier_support_t {
53 	const uint64_t *order;
54 	uint32_t count;
55 };
56 
57 struct i915_device {
58 	uint32_t graphics_version;
59 	int32_t has_llc;
60 	int32_t has_hw_protection;
61 	struct modifier_support_t modifier;
62 	int device_id;
63 	bool is_xelpd;
64 	/*TODO : cleanup is_mtl to avoid adding variables for every new platforms */
65 	bool is_mtl;
66 	int32_t num_fences_avail;
67 	bool has_mmap_offset;
68 };
69 
i915_info_from_device_id(struct i915_device * i915)70 static void i915_info_from_device_id(struct i915_device *i915)
71 {
72 	const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
73 				      0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
74 	const uint16_t gen4_ids[] = { 0x29A2, 0x2992, 0x2982, 0x2972, 0x2A02, 0x2A12, 0x2A42,
75 				      0x2E02, 0x2E12, 0x2E22, 0x2E32, 0x2E42, 0x2E92 };
76 	const uint16_t gen5_ids[] = { 0x0042, 0x0046 };
77 	const uint16_t gen6_ids[] = { 0x0102, 0x0112, 0x0122, 0x0106, 0x0116, 0x0126, 0x010A };
78 	const uint16_t gen7_ids[] = {
79 		0x0152, 0x0162, 0x0156, 0x0166, 0x015a, 0x016a, 0x0402, 0x0412, 0x0422,
80 		0x0406, 0x0416, 0x0426, 0x040A, 0x041A, 0x042A, 0x040B, 0x041B, 0x042B,
81 		0x040E, 0x041E, 0x042E, 0x0C02, 0x0C12, 0x0C22, 0x0C06, 0x0C16, 0x0C26,
82 		0x0C0A, 0x0C1A, 0x0C2A, 0x0C0B, 0x0C1B, 0x0C2B, 0x0C0E, 0x0C1E, 0x0C2E,
83 		0x0A02, 0x0A12, 0x0A22, 0x0A06, 0x0A16, 0x0A26, 0x0A0A, 0x0A1A, 0x0A2A,
84 		0x0A0B, 0x0A1B, 0x0A2B, 0x0A0E, 0x0A1E, 0x0A2E, 0x0D02, 0x0D12, 0x0D22,
85 		0x0D06, 0x0D16, 0x0D26, 0x0D0A, 0x0D1A, 0x0D2A, 0x0D0B, 0x0D1B, 0x0D2B,
86 		0x0D0E, 0x0D1E, 0x0D2E, 0x0F31, 0x0F32, 0x0F33, 0x0157, 0x0155
87 	};
88 	const uint16_t gen8_ids[] = { 0x22B0, 0x22B1, 0x22B2, 0x22B3, 0x1602, 0x1606,
89 				      0x160A, 0x160B, 0x160D, 0x160E, 0x1612, 0x1616,
90 				      0x161A, 0x161B, 0x161D, 0x161E, 0x1622, 0x1626,
91 				      0x162A, 0x162B, 0x162D, 0x162E };
92 	const uint16_t gen9_ids[] = {
93 		0x1902, 0x1906, 0x190A, 0x190B, 0x190E, 0x1912, 0x1913, 0x1915, 0x1916, 0x1917,
94 		0x191A, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, 0x1926, 0x1927, 0x192A, 0x192B,
95 		0x192D, 0x1932, 0x193A, 0x193B, 0x193D, 0x0A84, 0x1A84, 0x1A85, 0x5A84, 0x5A85,
96 		0x3184, 0x3185, 0x5902, 0x5906, 0x590A, 0x5908, 0x590B, 0x590E, 0x5913, 0x5915,
97 		0x5917, 0x5912, 0x5916, 0x591A, 0x591B, 0x591D, 0x591E, 0x5921, 0x5923, 0x5926,
98 		0x5927, 0x593B, 0x591C, 0x87C0, 0x87CA, 0x3E90, 0x3E93, 0x3E99, 0x3E9C, 0x3E91,
99 		0x3E92, 0x3E96, 0x3E98, 0x3E9A, 0x3E9B, 0x3E94, 0x3EA9, 0x3EA5, 0x3EA6, 0x3EA7,
100 		0x3EA8, 0x3EA1, 0x3EA4, 0x3EA0, 0x3EA3, 0x3EA2, 0x9B21, 0x9BA0, 0x9BA2, 0x9BA4,
101 		0x9BA5, 0x9BA8, 0x9BAA, 0x9BAB, 0x9BAC, 0x9B41, 0x9BC0, 0x9BC2, 0x9BC4, 0x9BC5,
102 		0x9BC6, 0x9BC8, 0x9BCA, 0x9BCB, 0x9BCC, 0x9BE6, 0x9BF6
103 	};
104 	const uint16_t gen11_ids[] = { 0x8A50, 0x8A51, 0x8A52, 0x8A53, 0x8A54, 0x8A56, 0x8A57,
105 				       0x8A58, 0x8A59, 0x8A5A, 0x8A5B, 0x8A5C, 0x8A5D, 0x8A71,
106 				       0x4500, 0x4541, 0x4551, 0x4555, 0x4557, 0x4571, 0x4E51,
107 				       0x4E55, 0x4E57, 0x4E61, 0x4E71 };
108 	const uint16_t gen12_ids[] = {
109 		0x4c8a, 0x4c8b, 0x4c8c, 0x4c90, 0x4c9a, 0x4680, 0x4681, 0x4682, 0x4683, 0x4688,
110 		0x4689, 0x4690, 0x4691, 0x4692, 0x4693, 0x4698, 0x4699, 0x4626, 0x4628, 0x462a,
111 		0x46a0, 0x46a1, 0x46a2, 0x46a3, 0x46a6, 0x46a8, 0x46aa, 0x46b0, 0x46b1, 0x46b2,
112 		0x46b3, 0x46c0, 0x46c1, 0x46c2, 0x46c3, 0x9A40, 0x9A49, 0x9A59, 0x9A60, 0x9A68,
113 		0x9A70, 0x9A78, 0x9AC0, 0x9AC9, 0x9AD9, 0x9AF8, 0x4905, 0x4906, 0x4907, 0x4908
114 	};
115 	const uint16_t adlp_ids[] = { 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A6, 0x46A8, 0x46AA,
116 				      0x462A, 0x4626, 0x4628, 0x46B0, 0x46B1, 0x46B2, 0x46B3,
117 				      0x46C0, 0x46C1, 0x46C2, 0x46C3, 0x46D0, 0x46D1, 0x46D2,
118 				      0x46D3, 0x46D4 };
119 
120 	const uint16_t rplp_ids[] = { 0xA720, 0xA721, 0xA7A0, 0xA7A1, 0xA7A8, 0xA7A9, 0xA7AA, 0xA7AB, 0xA7AC, 0xA7AD };
121 
122 	const uint16_t mtl_ids[] = { 0x7D40, 0x7D60, 0x7D45, 0x7D55, 0x7DD5 };
123 
124 	unsigned i;
125 	i915->graphics_version = 4;
126 	i915->is_xelpd = false;
127 	i915->is_mtl = false;
128 
129 	for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
130 		if (gen3_ids[i] == i915->device_id)
131 			i915->graphics_version = 3;
132 
133 	/* Gen 4 */
134 	for (i = 0; i < ARRAY_SIZE(gen4_ids); i++)
135 		if (gen4_ids[i] == i915->device_id)
136 			i915->graphics_version = 4;
137 
138 	/* Gen 5 */
139 	for (i = 0; i < ARRAY_SIZE(gen5_ids); i++)
140 		if (gen5_ids[i] == i915->device_id)
141 			i915->graphics_version = 5;
142 
143 	/* Gen 6 */
144 	for (i = 0; i < ARRAY_SIZE(gen6_ids); i++)
145 		if (gen6_ids[i] == i915->device_id)
146 			i915->graphics_version = 6;
147 
148 	/* Gen 7 */
149 	for (i = 0; i < ARRAY_SIZE(gen7_ids); i++)
150 		if (gen7_ids[i] == i915->device_id)
151 			i915->graphics_version = 7;
152 
153 	/* Gen 8 */
154 	for (i = 0; i < ARRAY_SIZE(gen8_ids); i++)
155 		if (gen8_ids[i] == i915->device_id)
156 			i915->graphics_version = 8;
157 
158 	/* Gen 9 */
159 	for (i = 0; i < ARRAY_SIZE(gen9_ids); i++)
160 		if (gen9_ids[i] == i915->device_id)
161 			i915->graphics_version = 9;
162 
163 	/* Gen 11 */
164 	for (i = 0; i < ARRAY_SIZE(gen11_ids); i++)
165 		if (gen11_ids[i] == i915->device_id)
166 			i915->graphics_version = 11;
167 
168 	/* Gen 12 */
169 	for (i = 0; i < ARRAY_SIZE(gen12_ids); i++)
170 		if (gen12_ids[i] == i915->device_id)
171 			i915->graphics_version = 12;
172 
173 	for (i = 0; i < ARRAY_SIZE(adlp_ids); i++)
174 		if (adlp_ids[i] == i915->device_id) {
175 			i915->is_xelpd = true;
176 			i915->graphics_version = 12;
177 		}
178 
179 	for (i = 0; i < ARRAY_SIZE(rplp_ids); i++)
180 		if (rplp_ids[i] == i915->device_id) {
181 			i915->is_xelpd = true;
182 			i915->graphics_version = 12;
183 		}
184 
185 	for (i = 0; i < ARRAY_SIZE(mtl_ids); i++)
186 		if (mtl_ids[i] == i915->device_id) {
187 			i915->graphics_version = 12;
188 			i915->is_mtl = true;
189 		}
190 }
191 
i915_get_modifier_order(struct i915_device * i915)192 static void i915_get_modifier_order(struct i915_device *i915)
193 {
194 	if (i915->is_mtl) {
195 		i915->modifier.order = xe_lpdp_modifier_order;
196 		i915->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
197 	} else if (i915->graphics_version == 12) {
198 		/*
199 		 * On ADL platforms of gen 12 onwards, Intel media compression is supported for
200 		 * video decoding on Chrome.
201 		 */
202 		i915->modifier.order = gen12_modifier_order;
203 		i915->modifier.count = ARRAY_SIZE(gen12_modifier_order);
204 	} else if (i915->graphics_version == 11) {
205 		i915->modifier.order = gen11_modifier_order;
206 		i915->modifier.count = ARRAY_SIZE(gen11_modifier_order);
207 	} else {
208 		i915->modifier.order = gen_modifier_order;
209 		i915->modifier.count = ARRAY_SIZE(gen_modifier_order);
210 	}
211 }
212 
unset_flags(uint64_t current_flags,uint64_t mask)213 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
214 {
215 	uint64_t value = current_flags & ~mask;
216 	return value;
217 }
218 
i915_add_combinations(struct driver * drv)219 static int i915_add_combinations(struct driver *drv)
220 {
221 	struct i915_device *i915 = drv->priv;
222 
223 	const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
224 	const uint64_t render = BO_USE_RENDER_MASK;
225 	const uint64_t texture_only = BO_USE_TEXTURE_MASK;
226 	// HW protected buffers also need to be scanned out.
227 	const uint64_t hw_protected =
228 	    i915->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
229 
230 	const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
231 				     BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
232 				     BO_USE_SW_WRITE_RARELY;
233 
234 	struct format_metadata metadata_linear = { .tiling = I915_TILING_NONE,
235 						   .priority = 1,
236 						   .modifier = DRM_FORMAT_MOD_LINEAR };
237 
238 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
239 			     &metadata_linear, scanout_and_render);
240 
241 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
242 			     render);
243 
244 	drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
245 			     &metadata_linear, texture_only);
246 
247 	drv_modify_linear_combinations(drv);
248 
249 	/* NV12 format for camera, display, decoding and encoding. */
250 	/* IPU3 camera ISP supports only NV12 output. */
251 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
252 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
253 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
254 				   hw_protected);
255 
256 	/* P010 linear can be used for scanout too. */
257 	drv_modify_combination(drv, DRM_FORMAT_P010, &metadata_linear, BO_USE_SCANOUT);
258 
259 	/* Android CTS tests require this. */
260 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
261 
262 	/*
263 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
264 	 * from camera and input/output from hardware decoder/encoder.
265 	 */
266 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
267 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
268 				   BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
269 				   BO_USE_SENSOR_DIRECT_DATA);
270 
271 	const uint64_t render_not_linear = unset_flags(render, linear_mask);
272 	const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
273 
274 	struct format_metadata metadata_x_tiled = { .tiling = I915_TILING_X,
275 						    .priority = 2,
276 						    .modifier = I915_FORMAT_MOD_X_TILED };
277 
278 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
279 			     render_not_linear);
280 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
281 			     &metadata_x_tiled, scanout_and_render_not_linear);
282 
283 	if (i915->is_mtl) {
284 		struct format_metadata metadata_4_tiled = { .tiling = I915_TILING_4,
285 							    .priority = 3,
286 							    .modifier = I915_FORMAT_MOD_4_TILED };
287 /* Support tile4 NV12 and P010 for libva */
288 #ifdef I915_SCANOUT_4_TILED
289 		const uint64_t nv12_usage =
290 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
291 		const uint64_t p010_usage =
292 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected | BO_USE_SCANOUT;
293 #else
294 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
295 		const uint64_t p010_usage = nv12_usage;
296 #endif
297 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
298 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
299 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
300 				     &metadata_4_tiled, render_not_linear);
301 		drv_add_combinations(drv, scanout_render_formats,
302 				     ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
303 				     scanout_and_render_not_linear);
304 	} else {
305 		struct format_metadata metadata_y_tiled = { .tiling = I915_TILING_Y,
306 							    .priority = 3,
307 							    .modifier = I915_FORMAT_MOD_Y_TILED };
308 
309 /* Support y-tiled NV12 and P010 for libva */
310 #ifdef I915_SCANOUT_Y_TILED
311 		const uint64_t nv12_usage =
312 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
313 		const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER |
314 					    hw_protected |
315 					    (i915->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
316 #else
317 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
318 		const uint64_t p010_usage = nv12_usage;
319 #endif
320 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
321 				     &metadata_y_tiled, render_not_linear);
322 		/* Y-tiled scanout isn't available on old platforms so we add
323 		 * |scanout_render_formats| without that USE flag.
324 		 */
325 		drv_add_combinations(drv, scanout_render_formats,
326 				     ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
327 				     render_not_linear);
328 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
329 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
330 	}
331 	return 0;
332 }
333 
i915_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)334 static int i915_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
335 				 uint32_t *aligned_height)
336 {
337 	struct i915_device *i915 = bo->drv->priv;
338 	uint32_t horizontal_alignment;
339 	uint32_t vertical_alignment;
340 
341 	switch (tiling) {
342 	default:
343 	case I915_TILING_NONE:
344 		/*
345 		 * The Intel GPU doesn't need any alignment in linear mode,
346 		 * but libva requires the allocation stride to be aligned to
347 		 * 16 bytes and height to 4 rows. Further, we round up the
348 		 * horizontal alignment so that row start on a cache line (64
349 		 * bytes).
350 		 */
351 #ifdef LINEAR_ALIGN_256
352 		/*
353 		 * If we want to import these buffers to amdgpu they need to
354 		 * their match LINEAR_ALIGNED requirement of 256 byte alignement.
355 		 */
356 		horizontal_alignment = 256;
357 #else
358 		horizontal_alignment = 64;
359 #endif
360 
361 		/*
362 		 * For hardware video encoding buffers, we want to align to the size of a
363 		 * macroblock, because otherwise we will end up encoding uninitialized data.
364 		 * This can result in substantial quality degradations, especially on lower
365 		 * resolution videos, because this uninitialized data may be high entropy.
366 		 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
367 		 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
368 		 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
369 		 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
370 		 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
371 		 * will cause the GPU to read out-of-bounds.
372 		 *
373 		 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
374 		 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
375 		 * virtgpu_cross_domain backend which passes that flag through.
376 		 */
377 		if (bo->meta.use_flags & BO_USE_HW_VIDEO_ENCODER) {
378 			vertical_alignment = 8;
379 		} else if (format == DRM_FORMAT_R8 && *aligned_height == 1) {
380 			vertical_alignment = 1;
381 		} else {
382 			vertical_alignment = 4;
383 		}
384 
385 		break;
386 
387 	case I915_TILING_X:
388 		horizontal_alignment = 512;
389 		vertical_alignment = 8;
390 		break;
391 
392 	case I915_TILING_Y:
393 	case I915_TILING_4:
394 		if (i915->graphics_version == 3) {
395 			horizontal_alignment = 512;
396 			vertical_alignment = 8;
397 		} else {
398 			horizontal_alignment = 128;
399 			vertical_alignment = 32;
400 		}
401 		break;
402 	}
403 
404 	*aligned_height = ALIGN(*aligned_height, vertical_alignment);
405 	if (i915->graphics_version > 3) {
406 		*stride = ALIGN(*stride, horizontal_alignment);
407 	} else {
408 		while (*stride > horizontal_alignment)
409 			horizontal_alignment <<= 1;
410 
411 		*stride = horizontal_alignment;
412 	}
413 
414 	if (i915->graphics_version <= 3 && *stride > 8192)
415 		return -EINVAL;
416 
417 	return 0;
418 }
419 
i915_clflush(void * start,size_t size)420 static void i915_clflush(void *start, size_t size)
421 {
422 	void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
423 	void *end = (void *)((uintptr_t)start + size);
424 
425 	__builtin_ia32_mfence();
426 	while (p < end) {
427 #if defined(__CLFLUSHOPT__)
428 		__builtin_ia32_clflushopt(p);
429 #else
430 		__builtin_ia32_clflush(p);
431 #endif
432 		p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
433 	}
434 	__builtin_ia32_mfence();
435 }
436 
i915_init(struct driver * drv)437 static int i915_init(struct driver *drv)
438 {
439 	int ret, val;
440 	struct i915_device *i915;
441 	drm_i915_getparam_t get_param = { 0 };
442 
443 	i915 = calloc(1, sizeof(*i915));
444 	if (!i915)
445 		return -ENOMEM;
446 
447 	get_param.param = I915_PARAM_CHIPSET_ID;
448 	get_param.value = &(i915->device_id);
449 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
450 	if (ret) {
451 		drv_loge("Failed to get I915_PARAM_CHIPSET_ID\n");
452 		free(i915);
453 		return -EINVAL;
454 	}
455 	/* must call before i915->graphics_version is used anywhere else */
456 	i915_info_from_device_id(i915);
457 
458 	i915_get_modifier_order(i915);
459 
460 	memset(&get_param, 0, sizeof(get_param));
461 	get_param.param = I915_PARAM_HAS_LLC;
462 	get_param.value = &i915->has_llc;
463 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
464 	if (ret) {
465 		drv_loge("Failed to get I915_PARAM_HAS_LLC\n");
466 		free(i915);
467 		return -EINVAL;
468 	}
469 
470 	memset(&get_param, 0, sizeof(get_param));
471 	get_param.param = I915_PARAM_NUM_FENCES_AVAIL;
472 	get_param.value = &i915->num_fences_avail;
473 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
474 	if (ret) {
475 		drv_loge("Failed to get I915_PARAM_NUM_FENCES_AVAIL\n");
476 		free(i915);
477 		return -EINVAL;
478 	}
479 
480 	memset(&get_param, 0, sizeof(get_param));
481 	get_param.param = I915_PARAM_MMAP_GTT_VERSION;
482 	get_param.value = &val;
483 
484 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
485 	if (ret) {
486 		drv_loge("Failed to get I915_PARAM_MMAP_GTT_VERSION\n");
487 		free(i915);
488 		return -EINVAL;
489 	}
490 	i915->has_mmap_offset = (val >= 4);
491 
492 	if (i915->graphics_version >= 12)
493 		i915->has_hw_protection = 1;
494 
495 	drv->priv = i915;
496 	return i915_add_combinations(drv);
497 }
498 
499 /*
500  * Returns true if the height of a buffer of the given format should be aligned
501  * to the largest coded unit (LCU) assuming that it will be used for video. This
502  * is based on gmmlib's GmmIsYUVFormatLCUAligned().
503  */
i915_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct i915_device * i915)504 static bool i915_format_needs_LCU_alignment(uint32_t format, size_t plane,
505 					    const struct i915_device *i915)
506 {
507 	switch (format) {
508 	case DRM_FORMAT_NV12:
509 	case DRM_FORMAT_P010:
510 	case DRM_FORMAT_P016:
511 		return (i915->graphics_version == 11 || i915->graphics_version == 12) && plane == 1;
512 	}
513 	return false;
514 }
515 
i915_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)516 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
517 {
518 	uint32_t offset;
519 	size_t plane;
520 	int ret, pagesize;
521 	struct i915_device *i915 = bo->drv->priv;
522 
523 	offset = 0;
524 	pagesize = getpagesize();
525 
526 	for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
527 		uint32_t stride = drv_stride_from_format(format, width, plane);
528 		uint32_t plane_height = drv_height_from_format(format, height, plane);
529 
530 		if (bo->meta.tiling != I915_TILING_NONE)
531 			assert(IS_ALIGNED(offset, pagesize));
532 
533 		ret = i915_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
534 		if (ret)
535 			return ret;
536 
537 		if (i915_format_needs_LCU_alignment(format, plane, i915)) {
538 			/*
539 			 * Align the height of the V plane for certain formats to the
540 			 * largest coded unit (assuming that this BO may be used for video)
541 			 * to be consistent with gmmlib.
542 			 */
543 			plane_height = ALIGN(plane_height, 64);
544 		}
545 
546 		bo->meta.strides[plane] = stride;
547 		bo->meta.sizes[plane] = stride * plane_height;
548 		bo->meta.offsets[plane] = offset;
549 		offset += bo->meta.sizes[plane];
550 	}
551 
552 	bo->meta.total_size = ALIGN(offset, pagesize);
553 
554 	return 0;
555 }
556 
i915_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)557 static size_t i915_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
558 {
559 	size_t num_planes = drv_num_planes_from_format(format);
560 	if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
561 	    modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
562 	    modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
563 		assert(num_planes == 1);
564 		return 2;
565 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) {
566 		assert(num_planes == 2);
567 		return 4;
568 	}
569 
570 	return num_planes;
571 }
572 
i915_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)573 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
574 				    uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
575 {
576 	uint64_t modifier;
577 	struct i915_device *i915 = bo->drv->priv;
578 	bool huge_bo = (i915->graphics_version < 11) && (width > 4096);
579 
580 	if (modifiers) {
581 		modifier =
582 		    drv_pick_modifier(modifiers, count, i915->modifier.order, i915->modifier.count);
583 	} else {
584 		struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
585 		if (!combo)
586 			return -EINVAL;
587 		modifier = combo->metadata.modifier;
588 		/*
589 		 * Media compression modifiers should not be picked automatically by minigbm based
590 		 * on |use_flags|. Instead the client should request them explicitly through
591 		 * gbm_bo_create_with_modifiers().
592 		 */
593 		assert(modifier != I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS);
594 		/* TODO(b/323863689): Account for driver's bandwidth compression in minigbm for
595 		 * media compressed buffers. */
596 	}
597 	if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS &&
598 	    !(format == DRM_FORMAT_NV12 || format == DRM_FORMAT_P010)) {
599 		drv_loge("Media compression is only supported for NV12 and P010\n");
600 		return -EINVAL;
601 	}
602 
603 	/*
604 	 * i915 only supports linear/x-tiled above 4096 wide on Gen9/Gen10 GPU.
605 	 * VAAPI decode in NV12 Y tiled format so skip modifier change for NV12/P010 huge bo.
606 	 */
607 	if (huge_bo && format != DRM_FORMAT_NV12 && format != DRM_FORMAT_P010 &&
608 	    modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
609 		uint32_t i;
610 		for (i = 0; modifiers && i < count; i++) {
611 			if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
612 				break;
613 		}
614 		if (i == count)
615 			modifier = DRM_FORMAT_MOD_LINEAR;
616 		else
617 			modifier = I915_FORMAT_MOD_X_TILED;
618 	}
619 
620 	/*
621 	 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
622 	 * Pick y tiled modifier if it has been passed in, otherwise use linear
623 	 */
624 	if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
625 		uint32_t i;
626 		for (i = 0; modifiers && i < count; i++) {
627 			if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
628 				break;
629 		}
630 		if (i == count)
631 			modifier = DRM_FORMAT_MOD_LINEAR;
632 		else
633 			modifier = I915_FORMAT_MOD_Y_TILED;
634 	}
635 
636 	/* Prevent gen 8 and earlier from trying to use a tiling modifier */
637 	if (i915->graphics_version <= 8 && format == DRM_FORMAT_ARGB8888) {
638 		modifier = DRM_FORMAT_MOD_LINEAR;
639 	}
640 
641 	switch (modifier) {
642 	case DRM_FORMAT_MOD_LINEAR:
643 		bo->meta.tiling = I915_TILING_NONE;
644 		break;
645 	case I915_FORMAT_MOD_X_TILED:
646 		bo->meta.tiling = I915_TILING_X;
647 		break;
648 	case I915_FORMAT_MOD_Y_TILED:
649 	case I915_FORMAT_MOD_Y_TILED_CCS:
650 	/* For now support only I915_TILING_Y as this works with all
651 	 * IPs(render/media/display)
652 	 */
653 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
654 	case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
655 		bo->meta.tiling = I915_TILING_Y;
656 		break;
657 	case I915_FORMAT_MOD_4_TILED:
658 	case I915_FORMAT_MOD_4_TILED_MTL_RC_CCS:
659 		bo->meta.tiling = I915_TILING_4;
660 		break;
661 	}
662 
663 	bo->meta.format_modifier = modifier;
664 
665 	if (format == DRM_FORMAT_YVU420_ANDROID) {
666 		/*
667 		 * We only need to be able to use this as a linear texture,
668 		 * which doesn't put any HW restrictions on how we lay it
669 		 * out. The Android format does require the stride to be a
670 		 * multiple of 16 and expects the Cr and Cb stride to be
671 		 * ALIGN(Y_stride / 2, 16), which we can make happen by
672 		 * aligning to 32 bytes here.
673 		 */
674 		uint32_t stride = ALIGN(width, 32);
675 		return drv_bo_from_format(bo, stride, 1, height, format);
676 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
677 		/*
678 		 * For compressed surfaces, we need a color control surface
679 		 * (CCS). Color compression is only supported for Y tiled
680 		 * surfaces, and for each 32x16 tiles in the main surface we
681 		 * need a tile in the control surface.  Y tiles are 128 bytes
682 		 * wide and 32 lines tall and we use that to first compute the
683 		 * width and height in tiles of the main surface. stride and
684 		 * height are already multiples of 128 and 32, respectively:
685 		 */
686 		uint32_t stride = drv_stride_from_format(format, width, 0);
687 		uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
688 		uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
689 		uint32_t size = width_in_tiles * height_in_tiles * 4096;
690 		uint32_t offset = 0;
691 
692 		bo->meta.strides[0] = width_in_tiles * 128;
693 		bo->meta.sizes[0] = size;
694 		bo->meta.offsets[0] = offset;
695 		offset += size;
696 
697 		/*
698 		 * Now, compute the width and height in tiles of the control
699 		 * surface by dividing and rounding up.
700 		 */
701 		uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
702 		uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
703 		uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
704 
705 		/*
706 		 * With stride and height aligned to y tiles, offset is
707 		 * already a multiple of 4096, which is the required alignment
708 		 * of the CCS.
709 		 */
710 		bo->meta.strides[1] = ccs_width_in_tiles * 128;
711 		bo->meta.sizes[1] = ccs_size;
712 		bo->meta.offsets[1] = offset;
713 		offset += ccs_size;
714 
715 		bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
716 		bo->meta.total_size = offset;
717 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
718 		   modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) {
719 		/*
720 		 * Media compression modifiers should only be possible via the
721 		 * gbm_bo_create_with_modifiers() path, i.e., the minigbm client needs to
722 		 * explicitly request it.
723 		 */
724 		assert(modifier != I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
725 		       use_flags == BO_USE_NONE);
726 		assert(modifier != I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
727 		       bo->meta.use_flags == BO_USE_NONE);
728 		assert(modifier != I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS ||
729 		       (!!modifiers && count > 0));
730 		assert(drv_num_planes_from_format(format) > 0);
731 
732 		uint32_t offset = 0;
733 		size_t plane = 0;
734 		size_t a_plane = 0;
735 		/*
736 		 * considering only 128 byte compression and one cache line of
737 		 * aux buffer(64B) contains compression status of 4-Y tiles.
738 		 * Which is 4 * (128B * 32L).
739 		 * line stride(bytes) is 4 * 128B
740 		 * and tile stride(lines) is 32L
741 		 */
742 		for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
743 			uint32_t stride = ALIGN(drv_stride_from_format(format, width, plane), 512);
744 
745 			const uint32_t plane_height = drv_height_from_format(format, height, plane);
746 			uint32_t aligned_height = ALIGN(plane_height, 32);
747 
748 			if (i915->is_xelpd && (stride > 1)) {
749 				stride = 1 << (32 - __builtin_clz(stride - 1));
750 				aligned_height = ALIGN(plane_height, 128);
751 			}
752 
753 			bo->meta.strides[plane] = stride;
754 			/* size calculation & alignment are 64KB aligned
755 			 * size as per spec
756 			 */
757 			bo->meta.sizes[plane] = ALIGN(stride * aligned_height, 512 * 128);
758 			bo->meta.offsets[plane] = offset;
759 			/* next buffer offset */
760 			offset += bo->meta.sizes[plane];
761 		}
762 
763 		/* Aux buffer is linear and page aligned. It is placed after
764 		 * other planes and aligned to main buffer stride.
765 		 */
766 		for (a_plane = 0; a_plane < plane; a_plane++) {
767 			/* Every 64 bytes in the aux plane contain compression information for a
768 			 * sub-row of 4 Y tiles of the corresponding main plane, so the pitch in
769 			 * bytes of the aux plane should be the pitch of the main plane in units of
770 			 * 4 tiles multiplied by 64 (or equivalently, the pitch of the main plane in
771 			 * bytes divided by 8).
772 			 */
773 			bo->meta.strides[plane + a_plane] = bo->meta.strides[a_plane] / 8;
774 			/* Aligned to page size */
775 			bo->meta.sizes[plane + a_plane] =
776 			    ALIGN(bo->meta.sizes[a_plane] / 256, 4 * 1024);
777 			bo->meta.offsets[plane + a_plane] = offset;
778 
779 			/* next buffer offset */
780 			offset += bo->meta.sizes[plane + a_plane];
781 		}
782 		/* Total number of planes & sizes */
783 		bo->meta.num_planes = plane + a_plane;
784 		bo->meta.total_size = offset;
785 	} else if (modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
786 
787 		/*
788 		 * considering only 128 byte compression and one cache line of
789 		 * aux buffer(64B) contains compression status of 4-Y tiles.
790 		 * Which is 4 * (128B * 32L).
791 		 * line stride(bytes) is 4 * 128B
792 		 * and tile stride(lines) is 32L
793 		 */
794 		uint32_t stride = ALIGN(drv_stride_from_format(format, width, 0), 512);
795 		stride = ALIGN(stride, 256);
796 
797 		height = ALIGN(drv_height_from_format(format, height, 0), 32);
798 
799 
800 		bo->meta.strides[0] = stride;
801 		/* size calculation and alignment are 64KB aligned
802 		 * size as per spec
803 		 */
804 		bo->meta.sizes[0] = ALIGN(stride * height, 65536);
805 		bo->meta.offsets[0] = 0;
806 
807 		/* Aux buffer is linear and page aligned. It is placed after
808 		 * other planes and aligned to main buffer stride.
809 		 */
810 		bo->meta.strides[1] = bo->meta.strides[0] / 8;
811 
812 		/* Aligned to page size */
813 		bo->meta.sizes[1] = ALIGN(bo->meta.sizes[0] / 256, getpagesize());
814 		bo->meta.offsets[1] = bo->meta.sizes[0];
815 		/* Total number of planes & sizes */
816 		bo->meta.num_planes = 2;
817 		bo->meta.total_size = bo->meta.sizes[0] + bo->meta.sizes[1];
818 	} else {
819 		return i915_bo_from_format(bo, width, height, format);
820 	}
821 	return 0;
822 }
823 
i915_bo_create_from_metadata(struct bo * bo)824 static int i915_bo_create_from_metadata(struct bo *bo)
825 {
826 	int ret;
827 	uint32_t gem_handle;
828 	struct drm_i915_gem_set_tiling gem_set_tiling = { 0 };
829 	struct i915_device *i915 = bo->drv->priv;
830 
831 	if (i915->has_hw_protection && (bo->meta.use_flags & BO_USE_PROTECTED)) {
832 		struct drm_i915_gem_create_ext_protected_content protected_content = {
833 			.base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
834 			.flags = 0,
835 		};
836 
837 		struct drm_i915_gem_create_ext create_ext = {
838 			.size = bo->meta.total_size,
839 			.extensions = (uintptr_t)&protected_content,
840 		};
841 
842 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
843 		if (ret) {
844 			drv_loge("DRM_IOCTL_I915_GEM_CREATE_EXT failed (size=%llu) (ret=%d) \n",
845 				 create_ext.size, ret);
846 			return -errno;
847 		}
848 
849 		gem_handle = create_ext.handle;
850 	} else {
851 		struct drm_i915_gem_create gem_create = { 0 };
852 		gem_create.size = bo->meta.total_size;
853 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
854 		if (ret) {
855 			drv_loge("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
856 			return -errno;
857 		}
858 
859 		gem_handle = gem_create.handle;
860 	}
861 
862 	bo->handle.u32 = gem_handle;
863 
864 	/* Set/Get tiling ioctl not supported  based on fence availability
865 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
866 	 */
867 	if (i915->num_fences_avail) {
868 		gem_set_tiling.handle = bo->handle.u32;
869 		gem_set_tiling.tiling_mode = bo->meta.tiling;
870 		gem_set_tiling.stride = bo->meta.strides[0];
871 
872 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
873 		if (ret) {
874 			struct drm_gem_close gem_close = { 0 };
875 			gem_close.handle = bo->handle.u32;
876 			drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
877 
878 			drv_loge("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
879 			return -errno;
880 		}
881 	}
882 
883 	bo->meta.cached = (i915->has_llc || i915->is_mtl) &&
884 			  !(bo->meta.use_flags & BO_USE_SCANOUT);
885 
886 	return 0;
887 }
888 
i915_close(struct driver * drv)889 static void i915_close(struct driver *drv)
890 {
891 	free(drv->priv);
892 	drv->priv = NULL;
893 }
894 
i915_bo_import(struct bo * bo,struct drv_import_fd_data * data)895 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
896 {
897 	int ret;
898 	struct drm_i915_gem_get_tiling gem_get_tiling = { 0 };
899 	struct i915_device *i915 = bo->drv->priv;
900 
901 	bo->meta.num_planes =
902 	    i915_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
903 
904 	ret = drv_prime_bo_import(bo, data);
905 	if (ret)
906 		return ret;
907 
908 	/* Set/Get tiling ioctl not supported  based on fence availability
909 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
910 	 */
911 	if (i915->num_fences_avail) {
912 		/* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
913 		gem_get_tiling.handle = bo->handle.u32;
914 
915 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
916 		if (ret) {
917 			drv_gem_bo_destroy(bo);
918 			drv_loge("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
919 			return ret;
920 		}
921 		bo->meta.tiling = gem_get_tiling.tiling_mode;
922 	}
923 	return 0;
924 }
925 
i915_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)926 static void *i915_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
927 {
928 	int ret;
929 	void *addr = MAP_FAILED;
930 	struct i915_device *i915 = bo->drv->priv;
931 
932 	if ((bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_CCS) ||
933 	    (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) ||
934 	    (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) ||
935 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED) ||
936 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS))
937 		return MAP_FAILED;
938 
939 	if (bo->meta.tiling == I915_TILING_NONE) {
940 		if (i915->has_mmap_offset) {
941 			struct drm_i915_gem_mmap_offset gem_map = { 0 };
942 			gem_map.handle = bo->handle.u32;
943 			gem_map.flags = I915_MMAP_OFFSET_WB;
944 
945 			/* Get the fake offset back */
946 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_map);
947 			if (ret == 0)
948 				addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags),
949 					    MAP_SHARED, bo->drv->fd, gem_map.offset);
950 		} else {
951 			struct drm_i915_gem_mmap gem_map = { 0 };
952 			/* TODO(b/118799155): We don't seem to have a good way to
953 			 * detect the use cases for which WC mapping is really needed.
954 			 * The current heuristic seems overly coarse and may be slowing
955 			 * down some other use cases unnecessarily.
956 			 *
957 			 * For now, care must be taken not to use WC mappings for
958 			 * Renderscript and camera use cases, as they're
959 			 * performance-sensitive. */
960 			if ((bo->meta.use_flags & BO_USE_SCANOUT) &&
961 			    !(bo->meta.use_flags &
962 			      (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
963 				gem_map.flags = I915_MMAP_WC;
964 
965 			gem_map.handle = bo->handle.u32;
966 			gem_map.offset = 0;
967 			gem_map.size = bo->meta.total_size;
968 
969 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
970 			/* DRM_IOCTL_I915_GEM_MMAP mmaps the underlying shm
971 			 * file and returns a user space address directly, ie,
972 			 * doesn't go through mmap. If we try that on a
973 			 * dma-buf that doesn't have a shm file, i915.ko
974 			 * returns ENXIO.  Fall through to
975 			 * DRM_IOCTL_I915_GEM_MMAP_GTT in that case, which
976 			 * will mmap on the drm fd instead. */
977 			if (ret == 0)
978 				addr = (void *)(uintptr_t)gem_map.addr_ptr;
979 		}
980 	}
981 
982 	if (addr == MAP_FAILED) {
983 		struct drm_i915_gem_mmap_gtt gem_map = { 0 };
984 
985 		gem_map.handle = bo->handle.u32;
986 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
987 		if (ret) {
988 			drv_loge("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
989 			return MAP_FAILED;
990 		}
991 
992 		addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
993 			    bo->drv->fd, gem_map.offset);
994 	}
995 
996 	if (addr == MAP_FAILED) {
997 		drv_loge("i915 GEM mmap failed\n");
998 		return addr;
999 	}
1000 
1001 	vma->length = bo->meta.total_size;
1002 	return addr;
1003 }
1004 
i915_bo_invalidate(struct bo * bo,struct mapping * mapping)1005 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
1006 {
1007 	int ret;
1008 	struct drm_i915_gem_set_domain set_domain = { 0 };
1009 
1010 	set_domain.handle = bo->handle.u32;
1011 	if (bo->meta.tiling == I915_TILING_NONE) {
1012 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1013 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1014 			set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1015 	} else {
1016 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1017 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1018 			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1019 	}
1020 
1021 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
1022 	if (ret) {
1023 		drv_loge("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
1024 		return ret;
1025 	}
1026 
1027 	return 0;
1028 }
1029 
i915_bo_flush(struct bo * bo,struct mapping * mapping)1030 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
1031 {
1032 	struct i915_device *i915 = bo->drv->priv;
1033 	if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
1034 		i915_clflush(mapping->vma->addr, mapping->vma->length);
1035 
1036 	return 0;
1037 }
1038 
1039 const struct backend backend_i915 = {
1040 	.name = "i915",
1041 	.init = i915_init,
1042 	.close = i915_close,
1043 	.bo_compute_metadata = i915_bo_compute_metadata,
1044 	.bo_create_from_metadata = i915_bo_create_from_metadata,
1045 	.bo_destroy = drv_gem_bo_destroy,
1046 	.bo_import = i915_bo_import,
1047 	.bo_map = i915_bo_map,
1048 	.bo_unmap = drv_bo_munmap,
1049 	.bo_invalidate = i915_bo_invalidate,
1050 	.bo_flush = i915_bo_flush,
1051 	.resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
1052 	.num_planes_from_modifier = i915_num_planes_from_modifier,
1053 };
1054 
1055 #endif
1056