• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_I915
8 
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17 
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 #include "external/i915_drm.h"
21 #include "util.h"
22 
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25 
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27 						   DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28 						   DRM_FORMAT_RGB565,	   DRM_FORMAT_XBGR2101010,
29 						   DRM_FORMAT_XBGR8888,	   DRM_FORMAT_XRGB2101010,
30 						   DRM_FORMAT_XRGB8888 };
31 
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33 
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35 						 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36 
37 static const uint64_t gen_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Y_TILED,
38 					       I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR };
39 
40 static const uint64_t gen12_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
41 						 I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
42 						 DRM_FORMAT_MOD_LINEAR };
43 
44 static const uint64_t gen11_modifier_order[] = { I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
45 						 DRM_FORMAT_MOD_LINEAR };
46 
47 static const uint64_t xe_lpdp_modifier_order[] = { I915_FORMAT_MOD_4_TILED, I915_FORMAT_MOD_X_TILED,
48 						   DRM_FORMAT_MOD_LINEAR };
49 
50 struct modifier_support_t {
51 	const uint64_t *order;
52 	uint32_t count;
53 };
54 
55 struct i915_device {
56 	uint32_t graphics_version;
57 	int32_t has_llc;
58 	int32_t has_hw_protection;
59 	struct modifier_support_t modifier;
60 	int device_id;
61 	bool is_xelpd;
62 	/*TODO : cleanup is_mtl to avoid adding variables for every new platforms */
63 	bool is_mtl;
64 	int32_t num_fences_avail;
65 	bool has_mmap_offset;
66 };
67 
i915_info_from_device_id(struct i915_device * i915)68 static void i915_info_from_device_id(struct i915_device *i915)
69 {
70 	const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
71 				      0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
72 	const uint16_t gen4_ids[] = { 0x29A2, 0x2992, 0x2982, 0x2972, 0x2A02, 0x2A12, 0x2A42,
73 				      0x2E02, 0x2E12, 0x2E22, 0x2E32, 0x2E42, 0x2E92 };
74 	const uint16_t gen5_ids[] = { 0x0042, 0x0046 };
75 	const uint16_t gen6_ids[] = { 0x0102, 0x0112, 0x0122, 0x0106, 0x0116, 0x0126, 0x010A };
76 	const uint16_t gen7_ids[] = {
77 		0x0152, 0x0162, 0x0156, 0x0166, 0x015a, 0x016a, 0x0402, 0x0412, 0x0422,
78 		0x0406, 0x0416, 0x0426, 0x040A, 0x041A, 0x042A, 0x040B, 0x041B, 0x042B,
79 		0x040E, 0x041E, 0x042E, 0x0C02, 0x0C12, 0x0C22, 0x0C06, 0x0C16, 0x0C26,
80 		0x0C0A, 0x0C1A, 0x0C2A, 0x0C0B, 0x0C1B, 0x0C2B, 0x0C0E, 0x0C1E, 0x0C2E,
81 		0x0A02, 0x0A12, 0x0A22, 0x0A06, 0x0A16, 0x0A26, 0x0A0A, 0x0A1A, 0x0A2A,
82 		0x0A0B, 0x0A1B, 0x0A2B, 0x0A0E, 0x0A1E, 0x0A2E, 0x0D02, 0x0D12, 0x0D22,
83 		0x0D06, 0x0D16, 0x0D26, 0x0D0A, 0x0D1A, 0x0D2A, 0x0D0B, 0x0D1B, 0x0D2B,
84 		0x0D0E, 0x0D1E, 0x0D2E, 0x0F31, 0x0F32, 0x0F33, 0x0157, 0x0155
85 	};
86 	const uint16_t gen8_ids[] = { 0x22B0, 0x22B1, 0x22B2, 0x22B3, 0x1602, 0x1606,
87 				      0x160A, 0x160B, 0x160D, 0x160E, 0x1612, 0x1616,
88 				      0x161A, 0x161B, 0x161D, 0x161E, 0x1622, 0x1626,
89 				      0x162A, 0x162B, 0x162D, 0x162E };
90 	const uint16_t gen9_ids[] = {
91 		0x1902, 0x1906, 0x190A, 0x190B, 0x190E, 0x1912, 0x1913, 0x1915, 0x1916, 0x1917,
92 		0x191A, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, 0x1926, 0x1927, 0x192A, 0x192B,
93 		0x192D, 0x1932, 0x193A, 0x193B, 0x193D, 0x0A84, 0x1A84, 0x1A85, 0x5A84, 0x5A85,
94 		0x3184, 0x3185, 0x5902, 0x5906, 0x590A, 0x5908, 0x590B, 0x590E, 0x5913, 0x5915,
95 		0x5917, 0x5912, 0x5916, 0x591A, 0x591B, 0x591D, 0x591E, 0x5921, 0x5923, 0x5926,
96 		0x5927, 0x593B, 0x591C, 0x87C0, 0x87CA, 0x3E90, 0x3E93, 0x3E99, 0x3E9C, 0x3E91,
97 		0x3E92, 0x3E96, 0x3E98, 0x3E9A, 0x3E9B, 0x3E94, 0x3EA9, 0x3EA5, 0x3EA6, 0x3EA7,
98 		0x3EA8, 0x3EA1, 0x3EA4, 0x3EA0, 0x3EA3, 0x3EA2, 0x9B21, 0x9BA0, 0x9BA2, 0x9BA4,
99 		0x9BA5, 0x9BA8, 0x9BAA, 0x9BAB, 0x9BAC, 0x9B41, 0x9BC0, 0x9BC2, 0x9BC4, 0x9BC5,
100 		0x9BC6, 0x9BC8, 0x9BCA, 0x9BCB, 0x9BCC, 0x9BE6, 0x9BF6
101 	};
102 	const uint16_t gen11_ids[] = { 0x8A50, 0x8A51, 0x8A52, 0x8A53, 0x8A54, 0x8A56, 0x8A57,
103 				       0x8A58, 0x8A59, 0x8A5A, 0x8A5B, 0x8A5C, 0x8A5D, 0x8A71,
104 				       0x4500, 0x4541, 0x4551, 0x4555, 0x4557, 0x4571, 0x4E51,
105 				       0x4E55, 0x4E57, 0x4E61, 0x4E71 };
106 	const uint16_t gen12_ids[] = {
107 		0x4c8a, 0x4c8b, 0x4c8c, 0x4c90, 0x4c9a, 0x4680, 0x4681, 0x4682, 0x4683, 0x4688,
108 		0x4689, 0x4690, 0x4691, 0x4692, 0x4693, 0x4698, 0x4699, 0x4626, 0x4628, 0x462a,
109 		0x46a0, 0x46a1, 0x46a2, 0x46a3, 0x46a6, 0x46a8, 0x46aa, 0x46b0, 0x46b1, 0x46b2,
110 		0x46b3, 0x46c0, 0x46c1, 0x46c2, 0x46c3, 0x9A40, 0x9A49, 0x9A59, 0x9A60, 0x9A68,
111 		0x9A70, 0x9A78, 0x9AC0, 0x9AC9, 0x9AD9, 0x9AF8, 0x4905, 0x4906, 0x4907, 0x4908
112 	};
113 	const uint16_t adlp_ids[] = { 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A6, 0x46A8, 0x46AA,
114 				      0x462A, 0x4626, 0x4628, 0x46B0, 0x46B1, 0x46B2, 0x46B3,
115 				      0x46C0, 0x46C1, 0x46C2, 0x46C3, 0x46D0, 0x46D1, 0x46D2 };
116 
117 	const uint16_t rplp_ids[] = { 0xA720, 0xA721, 0xA7A0, 0xA7A1, 0xA7A8, 0xA7A9 };
118 
119 	const uint16_t mtl_ids[] = { 0x7D40, 0x7D60, 0x7D45, 0x7D55, 0x7DD5 };
120 
121 	unsigned i;
122 	i915->graphics_version = 4;
123 	i915->is_xelpd = false;
124 	i915->is_mtl = false;
125 
126 	for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
127 		if (gen3_ids[i] == i915->device_id)
128 			i915->graphics_version = 3;
129 
130 	/* Gen 4 */
131 	for (i = 0; i < ARRAY_SIZE(gen4_ids); i++)
132 		if (gen4_ids[i] == i915->device_id)
133 			i915->graphics_version = 4;
134 
135 	/* Gen 5 */
136 	for (i = 0; i < ARRAY_SIZE(gen5_ids); i++)
137 		if (gen5_ids[i] == i915->device_id)
138 			i915->graphics_version = 5;
139 
140 	/* Gen 6 */
141 	for (i = 0; i < ARRAY_SIZE(gen6_ids); i++)
142 		if (gen6_ids[i] == i915->device_id)
143 			i915->graphics_version = 6;
144 
145 	/* Gen 7 */
146 	for (i = 0; i < ARRAY_SIZE(gen7_ids); i++)
147 		if (gen7_ids[i] == i915->device_id)
148 			i915->graphics_version = 7;
149 
150 	/* Gen 8 */
151 	for (i = 0; i < ARRAY_SIZE(gen8_ids); i++)
152 		if (gen8_ids[i] == i915->device_id)
153 			i915->graphics_version = 8;
154 
155 	/* Gen 9 */
156 	for (i = 0; i < ARRAY_SIZE(gen9_ids); i++)
157 		if (gen9_ids[i] == i915->device_id)
158 			i915->graphics_version = 9;
159 
160 	/* Gen 11 */
161 	for (i = 0; i < ARRAY_SIZE(gen11_ids); i++)
162 		if (gen11_ids[i] == i915->device_id)
163 			i915->graphics_version = 11;
164 
165 	/* Gen 12 */
166 	for (i = 0; i < ARRAY_SIZE(gen12_ids); i++)
167 		if (gen12_ids[i] == i915->device_id)
168 			i915->graphics_version = 12;
169 
170 	for (i = 0; i < ARRAY_SIZE(adlp_ids); i++)
171 		if (adlp_ids[i] == i915->device_id) {
172 			i915->is_xelpd = true;
173 			i915->graphics_version = 12;
174 		}
175 
176 	for (i = 0; i < ARRAY_SIZE(rplp_ids); i++)
177 		if (rplp_ids[i] == i915->device_id) {
178 			i915->is_xelpd = true;
179 			i915->graphics_version = 12;
180 		}
181 
182 	for (i = 0; i < ARRAY_SIZE(mtl_ids); i++)
183 		if (mtl_ids[i] == i915->device_id) {
184 			i915->graphics_version = 12;
185 			i915->is_mtl = true;
186 		}
187 }
188 
i915_get_modifier_order(struct i915_device * i915)189 static void i915_get_modifier_order(struct i915_device *i915)
190 {
191 	if (i915->is_mtl) {
192 		i915->modifier.order = xe_lpdp_modifier_order;
193 		i915->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
194 	} else if (i915->graphics_version == 12) {
195 		i915->modifier.order = gen12_modifier_order;
196 		i915->modifier.count = ARRAY_SIZE(gen12_modifier_order);
197 	} else if (i915->graphics_version == 11) {
198 		i915->modifier.order = gen11_modifier_order;
199 		i915->modifier.count = ARRAY_SIZE(gen11_modifier_order);
200 	} else {
201 		i915->modifier.order = gen_modifier_order;
202 		i915->modifier.count = ARRAY_SIZE(gen_modifier_order);
203 	}
204 }
205 
unset_flags(uint64_t current_flags,uint64_t mask)206 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
207 {
208 	uint64_t value = current_flags & ~mask;
209 	return value;
210 }
211 
i915_add_combinations(struct driver * drv)212 static int i915_add_combinations(struct driver *drv)
213 {
214 	struct i915_device *i915 = drv->priv;
215 
216 	const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
217 	const uint64_t render = BO_USE_RENDER_MASK;
218 	const uint64_t texture_only = BO_USE_TEXTURE_MASK;
219 	// HW protected buffers also need to be scanned out.
220 	const uint64_t hw_protected =
221 	    i915->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
222 
223 	const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
224 				     BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
225 				     BO_USE_SW_WRITE_RARELY;
226 
227 	struct format_metadata metadata_linear = { .tiling = I915_TILING_NONE,
228 						   .priority = 1,
229 						   .modifier = DRM_FORMAT_MOD_LINEAR };
230 
231 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
232 			     &metadata_linear, scanout_and_render);
233 
234 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
235 			     render);
236 
237 	drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
238 			     &metadata_linear, texture_only);
239 
240 	drv_modify_linear_combinations(drv);
241 
242 	/* NV12 format for camera, display, decoding and encoding. */
243 	/* IPU3 camera ISP supports only NV12 output. */
244 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
245 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
246 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
247 				   hw_protected);
248 
249 	/* Android CTS tests require this. */
250 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
251 
252 	/*
253 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
254 	 * from camera and input/output from hardware decoder/encoder.
255 	 */
256 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
257 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
258 				   BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
259 				   BO_USE_SENSOR_DIRECT_DATA);
260 
261 	const uint64_t render_not_linear = unset_flags(render, linear_mask);
262 	const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
263 
264 	struct format_metadata metadata_x_tiled = { .tiling = I915_TILING_X,
265 						    .priority = 2,
266 						    .modifier = I915_FORMAT_MOD_X_TILED };
267 
268 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
269 			     render_not_linear);
270 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
271 			     &metadata_x_tiled, scanout_and_render_not_linear);
272 
273 	if (i915->is_mtl) {
274 		struct format_metadata metadata_4_tiled = { .tiling = I915_TILING_4,
275 							    .priority = 3,
276 							    .modifier = I915_FORMAT_MOD_4_TILED };
277 /* Support tile4 NV12 and P010 for libva */
278 #ifdef I915_SCANOUT_4_TILED
279 		const uint64_t nv12_usage =
280 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
281 		const uint64_t p010_usage =
282 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected | BO_USE_SCANOUT;
283 #else
284 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
285 		const uint64_t p010_usage = nv12_usage;
286 #endif
287 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
288 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
289 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
290 				     &metadata_4_tiled, render_not_linear);
291 		drv_add_combinations(drv, scanout_render_formats,
292 				     ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
293 				     render_not_linear);
294 	} else {
295 		struct format_metadata metadata_y_tiled = { .tiling = I915_TILING_Y,
296 							    .priority = 3,
297 							    .modifier = I915_FORMAT_MOD_Y_TILED };
298 /* Support y-tiled NV12 and P010 for libva */
299 #ifdef I915_SCANOUT_Y_TILED
300 		const uint64_t nv12_usage =
301 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
302 		const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER |
303 					    hw_protected |
304 					    (i915->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
305 #else
306 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
307 		const uint64_t p010_usage = nv12_usage;
308 #endif
309 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
310 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
311 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
312 				     &metadata_y_tiled, render_not_linear);
313 		/* Y-tiled scanout isn't available on old platforms so we add
314 		 * |scanout_render_formats| without that USE flag.
315 		 */
316 		drv_add_combinations(drv, scanout_render_formats,
317 				     ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
318 				     render_not_linear);
319 	}
320 	return 0;
321 }
322 
i915_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)323 static int i915_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
324 				 uint32_t *aligned_height)
325 {
326 	struct i915_device *i915 = bo->drv->priv;
327 	uint32_t horizontal_alignment;
328 	uint32_t vertical_alignment;
329 
330 	switch (tiling) {
331 	default:
332 	case I915_TILING_NONE:
333 		/*
334 		 * The Intel GPU doesn't need any alignment in linear mode,
335 		 * but libva requires the allocation stride to be aligned to
336 		 * 16 bytes and height to 4 rows. Further, we round up the
337 		 * horizontal alignment so that row start on a cache line (64
338 		 * bytes).
339 		 */
340 #ifdef LINEAR_ALIGN_256
341 		/*
342 		 * If we want to import these buffers to amdgpu they need to
343 		 * their match LINEAR_ALIGNED requirement of 256 byte alignement.
344 		 */
345 		horizontal_alignment = 256;
346 #else
347 		horizontal_alignment = 64;
348 #endif
349 		/*
350 		 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
351 		 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
352 		 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
353 		 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
354 		 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
355 		 * will cause the GPU to read out-of-bounds.
356 		 *
357 		 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
358 		 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
359 		 * virtgpu_cross_domain backend which passes that flag through.
360 		 */
361 		if (format == DRM_FORMAT_R8 && *aligned_height == 1) {
362 			vertical_alignment = 1;
363 		} else {
364 			vertical_alignment = 4;
365 		}
366 
367 		break;
368 
369 	case I915_TILING_X:
370 		horizontal_alignment = 512;
371 		vertical_alignment = 8;
372 		break;
373 
374 	case I915_TILING_Y:
375 	case I915_TILING_4:
376 		if (i915->graphics_version == 3) {
377 			horizontal_alignment = 512;
378 			vertical_alignment = 8;
379 		} else {
380 			horizontal_alignment = 128;
381 			vertical_alignment = 32;
382 		}
383 		break;
384 	}
385 
386 	*aligned_height = ALIGN(*aligned_height, vertical_alignment);
387 	if (i915->graphics_version > 3) {
388 		*stride = ALIGN(*stride, horizontal_alignment);
389 	} else {
390 		while (*stride > horizontal_alignment)
391 			horizontal_alignment <<= 1;
392 
393 		*stride = horizontal_alignment;
394 	}
395 
396 	if (i915->graphics_version <= 3 && *stride > 8192)
397 		return -EINVAL;
398 
399 	return 0;
400 }
401 
i915_clflush(void * start,size_t size)402 static void i915_clflush(void *start, size_t size)
403 {
404 	void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
405 	void *end = (void *)((uintptr_t)start + size);
406 
407 	__builtin_ia32_mfence();
408 	while (p < end) {
409 		__builtin_ia32_clflush(p);
410 		p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
411 	}
412 }
413 
i915_init(struct driver * drv)414 static int i915_init(struct driver *drv)
415 {
416 	int ret, val;
417 	struct i915_device *i915;
418 	drm_i915_getparam_t get_param = { 0 };
419 
420 	i915 = calloc(1, sizeof(*i915));
421 	if (!i915)
422 		return -ENOMEM;
423 
424 	get_param.param = I915_PARAM_CHIPSET_ID;
425 	get_param.value = &(i915->device_id);
426 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
427 	if (ret) {
428 		drv_loge("Failed to get I915_PARAM_CHIPSET_ID\n");
429 		free(i915);
430 		return -EINVAL;
431 	}
432 	/* must call before i915->graphics_version is used anywhere else */
433 	i915_info_from_device_id(i915);
434 
435 	i915_get_modifier_order(i915);
436 
437 	memset(&get_param, 0, sizeof(get_param));
438 	get_param.param = I915_PARAM_HAS_LLC;
439 	get_param.value = &i915->has_llc;
440 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
441 	if (ret) {
442 		drv_loge("Failed to get I915_PARAM_HAS_LLC\n");
443 		free(i915);
444 		return -EINVAL;
445 	}
446 
447 	memset(&get_param, 0, sizeof(get_param));
448 	get_param.param = I915_PARAM_NUM_FENCES_AVAIL;
449 	get_param.value = &i915->num_fences_avail;
450 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
451 	if (ret) {
452 		drv_loge("Failed to get I915_PARAM_NUM_FENCES_AVAIL\n");
453 		free(i915);
454 		return -EINVAL;
455 	}
456 
457 	memset(&get_param, 0, sizeof(get_param));
458 	get_param.param = I915_PARAM_MMAP_GTT_VERSION;
459 	get_param.value = &val;
460 
461 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
462 	if (ret) {
463 		drv_loge("Failed to get I915_PARAM_MMAP_GTT_VERSION\n");
464 		free(i915);
465 		return -EINVAL;
466 	}
467 	i915->has_mmap_offset = (val >= 4);
468 
469 	if (i915->graphics_version >= 12)
470 		i915->has_hw_protection = 1;
471 
472 	drv->priv = i915;
473 	return i915_add_combinations(drv);
474 }
475 
476 /*
477  * Returns true if the height of a buffer of the given format should be aligned
478  * to the largest coded unit (LCU) assuming that it will be used for video. This
479  * is based on gmmlib's GmmIsYUVFormatLCUAligned().
480  */
i915_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct i915_device * i915)481 static bool i915_format_needs_LCU_alignment(uint32_t format, size_t plane,
482 					    const struct i915_device *i915)
483 {
484 	switch (format) {
485 	case DRM_FORMAT_NV12:
486 	case DRM_FORMAT_P010:
487 	case DRM_FORMAT_P016:
488 		return (i915->graphics_version == 11 || i915->graphics_version == 12) && plane == 1;
489 	}
490 	return false;
491 }
492 
i915_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)493 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
494 {
495 	uint32_t offset;
496 	size_t plane;
497 	int ret, pagesize;
498 	struct i915_device *i915 = bo->drv->priv;
499 
500 	offset = 0;
501 	pagesize = getpagesize();
502 
503 	for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
504 		uint32_t stride = drv_stride_from_format(format, width, plane);
505 		uint32_t plane_height = drv_height_from_format(format, height, plane);
506 
507 		if (bo->meta.tiling != I915_TILING_NONE)
508 			assert(IS_ALIGNED(offset, pagesize));
509 
510 		ret = i915_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
511 		if (ret)
512 			return ret;
513 
514 		if (i915_format_needs_LCU_alignment(format, plane, i915)) {
515 			/*
516 			 * Align the height of the V plane for certain formats to the
517 			 * largest coded unit (assuming that this BO may be used for video)
518 			 * to be consistent with gmmlib.
519 			 */
520 			plane_height = ALIGN(plane_height, 64);
521 		}
522 
523 		bo->meta.strides[plane] = stride;
524 		bo->meta.sizes[plane] = stride * plane_height;
525 		bo->meta.offsets[plane] = offset;
526 		offset += bo->meta.sizes[plane];
527 	}
528 
529 	bo->meta.total_size = ALIGN(offset, pagesize);
530 
531 	return 0;
532 }
533 
i915_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)534 static size_t i915_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
535 {
536 	size_t num_planes = drv_num_planes_from_format(format);
537 	if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
538 	    modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
539 		assert(num_planes == 1);
540 		return 2;
541 	}
542 
543 	return num_planes;
544 }
545 
i915_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)546 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
547 				    uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
548 {
549 	uint64_t modifier;
550 	struct i915_device *i915 = bo->drv->priv;
551 	bool huge_bo = (i915->graphics_version < 11) && (width > 4096);
552 
553 	if (modifiers) {
554 		modifier =
555 		    drv_pick_modifier(modifiers, count, i915->modifier.order, i915->modifier.count);
556 	} else {
557 		struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
558 		if (!combo)
559 			return -EINVAL;
560 		modifier = combo->metadata.modifier;
561 	}
562 
563 	/*
564 	 * i915 only supports linear/x-tiled above 4096 wide on Gen9/Gen10 GPU.
565 	 * VAAPI decode in NV12 Y tiled format so skip modifier change for NV12/P010 huge bo.
566 	 */
567 	if (huge_bo && format != DRM_FORMAT_NV12 && format != DRM_FORMAT_P010 &&
568 	    modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
569 		uint32_t i;
570 		for (i = 0; modifiers && i < count; i++) {
571 			if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
572 				break;
573 		}
574 		if (i == count)
575 			modifier = DRM_FORMAT_MOD_LINEAR;
576 		else
577 			modifier = I915_FORMAT_MOD_X_TILED;
578 	}
579 
580 	/*
581 	 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
582 	 * Pick y tiled modifier if it has been passed in, otherwise use linear
583 	 */
584 	if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
585 		uint32_t i;
586 		for (i = 0; modifiers && i < count; i++) {
587 			if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
588 				break;
589 		}
590 		if (i == count)
591 			modifier = DRM_FORMAT_MOD_LINEAR;
592 		else
593 			modifier = I915_FORMAT_MOD_Y_TILED;
594 	}
595 
596 	/* Prevent gen 8 and earlier from trying to use a tiling modifier */
597 	if (i915->graphics_version <= 8 && format == DRM_FORMAT_ARGB8888) {
598 		modifier = DRM_FORMAT_MOD_LINEAR;
599 	}
600 
601 	switch (modifier) {
602 	case DRM_FORMAT_MOD_LINEAR:
603 		bo->meta.tiling = I915_TILING_NONE;
604 		break;
605 	case I915_FORMAT_MOD_X_TILED:
606 		bo->meta.tiling = I915_TILING_X;
607 		break;
608 	case I915_FORMAT_MOD_Y_TILED:
609 	case I915_FORMAT_MOD_Y_TILED_CCS:
610 	/* For now support only I915_TILING_Y as this works with all
611 	 * IPs(render/media/display)
612 	 */
613 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
614 		bo->meta.tiling = I915_TILING_Y;
615 		break;
616 	case I915_FORMAT_MOD_4_TILED:
617 		bo->meta.tiling = I915_TILING_4;
618 		break;
619 	}
620 
621 	bo->meta.format_modifier = modifier;
622 
623 	if (format == DRM_FORMAT_YVU420_ANDROID) {
624 		/*
625 		 * We only need to be able to use this as a linear texture,
626 		 * which doesn't put any HW restrictions on how we lay it
627 		 * out. The Android format does require the stride to be a
628 		 * multiple of 16 and expects the Cr and Cb stride to be
629 		 * ALIGN(Y_stride / 2, 16), which we can make happen by
630 		 * aligning to 32 bytes here.
631 		 */
632 		uint32_t stride = ALIGN(width, 32);
633 		return drv_bo_from_format(bo, stride, 1, height, format);
634 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
635 		/*
636 		 * For compressed surfaces, we need a color control surface
637 		 * (CCS). Color compression is only supported for Y tiled
638 		 * surfaces, and for each 32x16 tiles in the main surface we
639 		 * need a tile in the control surface.  Y tiles are 128 bytes
640 		 * wide and 32 lines tall and we use that to first compute the
641 		 * width and height in tiles of the main surface. stride and
642 		 * height are already multiples of 128 and 32, respectively:
643 		 */
644 		uint32_t stride = drv_stride_from_format(format, width, 0);
645 		uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
646 		uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
647 		uint32_t size = width_in_tiles * height_in_tiles * 4096;
648 		uint32_t offset = 0;
649 
650 		bo->meta.strides[0] = width_in_tiles * 128;
651 		bo->meta.sizes[0] = size;
652 		bo->meta.offsets[0] = offset;
653 		offset += size;
654 
655 		/*
656 		 * Now, compute the width and height in tiles of the control
657 		 * surface by dividing and rounding up.
658 		 */
659 		uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
660 		uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
661 		uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
662 
663 		/*
664 		 * With stride and height aligned to y tiles, offset is
665 		 * already a multiple of 4096, which is the required alignment
666 		 * of the CCS.
667 		 */
668 		bo->meta.strides[1] = ccs_width_in_tiles * 128;
669 		bo->meta.sizes[1] = ccs_size;
670 		bo->meta.offsets[1] = offset;
671 		offset += ccs_size;
672 
673 		bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
674 		bo->meta.total_size = offset;
675 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
676 
677 		/*
678 		 * considering only 128 byte compression and one cache line of
679 		 * aux buffer(64B) contains compression status of 4-Y tiles.
680 		 * Which is 4 * (128B * 32L).
681 		 * line stride(bytes) is 4 * 128B
682 		 * and tile stride(lines) is 32L
683 		 */
684 		uint32_t stride = ALIGN(drv_stride_from_format(format, width, 0), 512);
685 
686 		height = ALIGN(drv_height_from_format(format, height, 0), 32);
687 
688 		if (i915->is_xelpd && (stride > 1)) {
689 			stride = 1 << (32 - __builtin_clz(stride - 1));
690 			height = ALIGN(drv_height_from_format(format, height, 0), 128);
691 		}
692 
693 		bo->meta.strides[0] = stride;
694 		/* size calculation and alignment are 64KB aligned
695 		 * size as per spec
696 		 */
697 		bo->meta.sizes[0] = ALIGN(stride * height, 65536);
698 		bo->meta.offsets[0] = 0;
699 
700 		/* Aux buffer is linear and page aligned. It is placed after
701 		 * other planes and aligned to main buffer stride.
702 		 */
703 		bo->meta.strides[1] = bo->meta.strides[0] / 8;
704 		/* Aligned to page size */
705 		bo->meta.sizes[1] = ALIGN(bo->meta.sizes[0] / 256, getpagesize());
706 		bo->meta.offsets[1] = bo->meta.sizes[0];
707 		/* Total number of planes & sizes */
708 		bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
709 		bo->meta.total_size = bo->meta.sizes[0] + bo->meta.sizes[1];
710 	} else {
711 		return i915_bo_from_format(bo, width, height, format);
712 	}
713 	return 0;
714 }
715 
i915_bo_create_from_metadata(struct bo * bo)716 static int i915_bo_create_from_metadata(struct bo *bo)
717 {
718 	int ret;
719 	size_t plane;
720 	uint32_t gem_handle;
721 	struct drm_i915_gem_set_tiling gem_set_tiling = { 0 };
722 	struct i915_device *i915 = bo->drv->priv;
723 
724 	if (i915->has_hw_protection && (bo->meta.use_flags & BO_USE_PROTECTED)) {
725 		struct drm_i915_gem_create_ext_protected_content protected_content = {
726 			.base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
727 			.flags = 0,
728 		};
729 
730 		struct drm_i915_gem_create_ext create_ext = {
731 			.size = bo->meta.total_size,
732 			.extensions = (uintptr_t)&protected_content,
733 		};
734 
735 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
736 		if (ret) {
737 			drv_loge("DRM_IOCTL_I915_GEM_CREATE_EXT failed (size=%llu) (ret=%d) \n",
738 				 create_ext.size, ret);
739 			return -errno;
740 		}
741 
742 		gem_handle = create_ext.handle;
743 	} else {
744 		struct drm_i915_gem_create gem_create = { 0 };
745 		gem_create.size = bo->meta.total_size;
746 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
747 		if (ret) {
748 			drv_loge("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
749 			return -errno;
750 		}
751 
752 		gem_handle = gem_create.handle;
753 	}
754 
755 	for (plane = 0; plane < bo->meta.num_planes; plane++)
756 		bo->handles[plane].u32 = gem_handle;
757 
758 	/* Set/Get tiling ioctl not supported  based on fence availability
759 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
760 	 */
761 	if (i915->num_fences_avail) {
762 		gem_set_tiling.handle = bo->handles[0].u32;
763 		gem_set_tiling.tiling_mode = bo->meta.tiling;
764 		gem_set_tiling.stride = bo->meta.strides[0];
765 
766 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
767 		if (ret) {
768 			struct drm_gem_close gem_close = { 0 };
769 			gem_close.handle = bo->handles[0].u32;
770 			drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
771 
772 			drv_loge("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
773 			return -errno;
774 		}
775 	}
776 	return 0;
777 }
778 
i915_close(struct driver * drv)779 static void i915_close(struct driver *drv)
780 {
781 	free(drv->priv);
782 	drv->priv = NULL;
783 }
784 
i915_bo_import(struct bo * bo,struct drv_import_fd_data * data)785 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
786 {
787 	int ret;
788 	struct drm_i915_gem_get_tiling gem_get_tiling = { 0 };
789 	struct i915_device *i915 = bo->drv->priv;
790 
791 	bo->meta.num_planes =
792 	    i915_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
793 
794 	ret = drv_prime_bo_import(bo, data);
795 	if (ret)
796 		return ret;
797 
798 	/* Set/Get tiling ioctl not supported  based on fence availability
799 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
800 	 */
801 	if (i915->num_fences_avail) {
802 		/* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
803 		gem_get_tiling.handle = bo->handles[0].u32;
804 
805 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
806 		if (ret) {
807 			drv_gem_bo_destroy(bo);
808 			drv_loge("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
809 			return ret;
810 		}
811 		bo->meta.tiling = gem_get_tiling.tiling_mode;
812 	}
813 	return 0;
814 }
815 
i915_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)816 static void *i915_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
817 {
818 	int ret;
819 	void *addr = MAP_FAILED;
820 	struct i915_device *i915 = bo->drv->priv;
821 
822 	if ((bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_CCS) ||
823 	    (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) ||
824 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED))
825 		return MAP_FAILED;
826 
827 	if (bo->meta.tiling == I915_TILING_NONE) {
828 		if (i915->has_mmap_offset) {
829 			struct drm_i915_gem_mmap_offset gem_map = { 0 };
830 			gem_map.handle = bo->handles[0].u32;
831 			gem_map.flags = I915_MMAP_OFFSET_WB;
832 
833 			/* Get the fake offset back */
834 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_map);
835 			if (ret == 0)
836 				addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags),
837 					    MAP_SHARED, bo->drv->fd, gem_map.offset);
838 		} else {
839 			struct drm_i915_gem_mmap gem_map = { 0 };
840 			/* TODO(b/118799155): We don't seem to have a good way to
841 			 * detect the use cases for which WC mapping is really needed.
842 			 * The current heuristic seems overly coarse and may be slowing
843 			 * down some other use cases unnecessarily.
844 			 *
845 			 * For now, care must be taken not to use WC mappings for
846 			 * Renderscript and camera use cases, as they're
847 			 * performance-sensitive. */
848 			if ((bo->meta.use_flags & BO_USE_SCANOUT) &&
849 			    !(bo->meta.use_flags &
850 			      (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
851 				gem_map.flags = I915_MMAP_WC;
852 
853 			gem_map.handle = bo->handles[0].u32;
854 			gem_map.offset = 0;
855 			gem_map.size = bo->meta.total_size;
856 
857 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
858 			/* DRM_IOCTL_I915_GEM_MMAP mmaps the underlying shm
859 			 * file and returns a user space address directly, ie,
860 			 * doesn't go through mmap. If we try that on a
861 			 * dma-buf that doesn't have a shm file, i915.ko
862 			 * returns ENXIO.  Fall through to
863 			 * DRM_IOCTL_I915_GEM_MMAP_GTT in that case, which
864 			 * will mmap on the drm fd instead. */
865 			if (ret == 0)
866 				addr = (void *)(uintptr_t)gem_map.addr_ptr;
867 		}
868 	}
869 
870 	if (addr == MAP_FAILED) {
871 		struct drm_i915_gem_mmap_gtt gem_map = { 0 };
872 
873 		gem_map.handle = bo->handles[0].u32;
874 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
875 		if (ret) {
876 			drv_loge("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
877 			return MAP_FAILED;
878 		}
879 
880 		addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
881 			    bo->drv->fd, gem_map.offset);
882 	}
883 
884 	if (addr == MAP_FAILED) {
885 		drv_loge("i915 GEM mmap failed\n");
886 		return addr;
887 	}
888 
889 	vma->length = bo->meta.total_size;
890 	return addr;
891 }
892 
i915_bo_invalidate(struct bo * bo,struct mapping * mapping)893 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
894 {
895 	int ret;
896 	struct drm_i915_gem_set_domain set_domain = { 0 };
897 
898 	set_domain.handle = bo->handles[0].u32;
899 	if (bo->meta.tiling == I915_TILING_NONE) {
900 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
901 		if (mapping->vma->map_flags & BO_MAP_WRITE)
902 			set_domain.write_domain = I915_GEM_DOMAIN_CPU;
903 	} else {
904 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
905 		if (mapping->vma->map_flags & BO_MAP_WRITE)
906 			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
907 	}
908 
909 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
910 	if (ret) {
911 		drv_loge("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
912 		return ret;
913 	}
914 
915 	return 0;
916 }
917 
i915_bo_flush(struct bo * bo,struct mapping * mapping)918 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
919 {
920 	struct i915_device *i915 = bo->drv->priv;
921 	if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
922 		i915_clflush(mapping->vma->addr, mapping->vma->length);
923 
924 	return 0;
925 }
926 
927 const struct backend backend_i915 = {
928 	.name = "i915",
929 	.init = i915_init,
930 	.close = i915_close,
931 	.bo_compute_metadata = i915_bo_compute_metadata,
932 	.bo_create_from_metadata = i915_bo_create_from_metadata,
933 	.bo_destroy = drv_gem_bo_destroy,
934 	.bo_import = i915_bo_import,
935 	.bo_map = i915_bo_map,
936 	.bo_unmap = drv_bo_munmap,
937 	.bo_invalidate = i915_bo_invalidate,
938 	.bo_flush = i915_bo_flush,
939 	.resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
940 	.num_planes_from_modifier = i915_num_planes_from_modifier,
941 };
942 
943 #endif
944