1 /*
2 * Copyright 2014 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #ifdef DRV_I915
8
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 #include "external/i915_drm.h"
21 #include "util.h"
22
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27 DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28 DRM_FORMAT_RGB565, DRM_FORMAT_XBGR2101010,
29 DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB2101010,
30 DRM_FORMAT_XRGB8888 };
31
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36
37 static const uint64_t gen_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Y_TILED,
38 I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR };
39
40 static const uint64_t gen12_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
41 I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
42 DRM_FORMAT_MOD_LINEAR };
43
44 static const uint64_t gen11_modifier_order[] = { I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
45 DRM_FORMAT_MOD_LINEAR };
46
47 static const uint64_t xe_lpdp_modifier_order[] = { I915_FORMAT_MOD_4_TILED, I915_FORMAT_MOD_X_TILED,
48 DRM_FORMAT_MOD_LINEAR };
49
50 struct modifier_support_t {
51 const uint64_t *order;
52 uint32_t count;
53 };
54
55 struct i915_device {
56 uint32_t graphics_version;
57 int32_t has_llc;
58 int32_t has_hw_protection;
59 struct modifier_support_t modifier;
60 int device_id;
61 bool is_xelpd;
62 /*TODO : cleanup is_mtl to avoid adding variables for every new platforms */
63 bool is_mtl;
64 int32_t num_fences_avail;
65 bool has_mmap_offset;
66 };
67
i915_info_from_device_id(struct i915_device * i915)68 static void i915_info_from_device_id(struct i915_device *i915)
69 {
70 const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
71 0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
72 const uint16_t gen4_ids[] = { 0x29A2, 0x2992, 0x2982, 0x2972, 0x2A02, 0x2A12, 0x2A42,
73 0x2E02, 0x2E12, 0x2E22, 0x2E32, 0x2E42, 0x2E92 };
74 const uint16_t gen5_ids[] = { 0x0042, 0x0046 };
75 const uint16_t gen6_ids[] = { 0x0102, 0x0112, 0x0122, 0x0106, 0x0116, 0x0126, 0x010A };
76 const uint16_t gen7_ids[] = {
77 0x0152, 0x0162, 0x0156, 0x0166, 0x015a, 0x016a, 0x0402, 0x0412, 0x0422,
78 0x0406, 0x0416, 0x0426, 0x040A, 0x041A, 0x042A, 0x040B, 0x041B, 0x042B,
79 0x040E, 0x041E, 0x042E, 0x0C02, 0x0C12, 0x0C22, 0x0C06, 0x0C16, 0x0C26,
80 0x0C0A, 0x0C1A, 0x0C2A, 0x0C0B, 0x0C1B, 0x0C2B, 0x0C0E, 0x0C1E, 0x0C2E,
81 0x0A02, 0x0A12, 0x0A22, 0x0A06, 0x0A16, 0x0A26, 0x0A0A, 0x0A1A, 0x0A2A,
82 0x0A0B, 0x0A1B, 0x0A2B, 0x0A0E, 0x0A1E, 0x0A2E, 0x0D02, 0x0D12, 0x0D22,
83 0x0D06, 0x0D16, 0x0D26, 0x0D0A, 0x0D1A, 0x0D2A, 0x0D0B, 0x0D1B, 0x0D2B,
84 0x0D0E, 0x0D1E, 0x0D2E, 0x0F31, 0x0F32, 0x0F33, 0x0157, 0x0155
85 };
86 const uint16_t gen8_ids[] = { 0x22B0, 0x22B1, 0x22B2, 0x22B3, 0x1602, 0x1606,
87 0x160A, 0x160B, 0x160D, 0x160E, 0x1612, 0x1616,
88 0x161A, 0x161B, 0x161D, 0x161E, 0x1622, 0x1626,
89 0x162A, 0x162B, 0x162D, 0x162E };
90 const uint16_t gen9_ids[] = {
91 0x1902, 0x1906, 0x190A, 0x190B, 0x190E, 0x1912, 0x1913, 0x1915, 0x1916, 0x1917,
92 0x191A, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, 0x1926, 0x1927, 0x192A, 0x192B,
93 0x192D, 0x1932, 0x193A, 0x193B, 0x193D, 0x0A84, 0x1A84, 0x1A85, 0x5A84, 0x5A85,
94 0x3184, 0x3185, 0x5902, 0x5906, 0x590A, 0x5908, 0x590B, 0x590E, 0x5913, 0x5915,
95 0x5917, 0x5912, 0x5916, 0x591A, 0x591B, 0x591D, 0x591E, 0x5921, 0x5923, 0x5926,
96 0x5927, 0x593B, 0x591C, 0x87C0, 0x87CA, 0x3E90, 0x3E93, 0x3E99, 0x3E9C, 0x3E91,
97 0x3E92, 0x3E96, 0x3E98, 0x3E9A, 0x3E9B, 0x3E94, 0x3EA9, 0x3EA5, 0x3EA6, 0x3EA7,
98 0x3EA8, 0x3EA1, 0x3EA4, 0x3EA0, 0x3EA3, 0x3EA2, 0x9B21, 0x9BA0, 0x9BA2, 0x9BA4,
99 0x9BA5, 0x9BA8, 0x9BAA, 0x9BAB, 0x9BAC, 0x9B41, 0x9BC0, 0x9BC2, 0x9BC4, 0x9BC5,
100 0x9BC6, 0x9BC8, 0x9BCA, 0x9BCB, 0x9BCC, 0x9BE6, 0x9BF6
101 };
102 const uint16_t gen11_ids[] = { 0x8A50, 0x8A51, 0x8A52, 0x8A53, 0x8A54, 0x8A56, 0x8A57,
103 0x8A58, 0x8A59, 0x8A5A, 0x8A5B, 0x8A5C, 0x8A5D, 0x8A71,
104 0x4500, 0x4541, 0x4551, 0x4555, 0x4557, 0x4571, 0x4E51,
105 0x4E55, 0x4E57, 0x4E61, 0x4E71 };
106 const uint16_t gen12_ids[] = {
107 0x4c8a, 0x4c8b, 0x4c8c, 0x4c90, 0x4c9a, 0x4680, 0x4681, 0x4682, 0x4683, 0x4688,
108 0x4689, 0x4690, 0x4691, 0x4692, 0x4693, 0x4698, 0x4699, 0x4626, 0x4628, 0x462a,
109 0x46a0, 0x46a1, 0x46a2, 0x46a3, 0x46a6, 0x46a8, 0x46aa, 0x46b0, 0x46b1, 0x46b2,
110 0x46b3, 0x46c0, 0x46c1, 0x46c2, 0x46c3, 0x9A40, 0x9A49, 0x9A59, 0x9A60, 0x9A68,
111 0x9A70, 0x9A78, 0x9AC0, 0x9AC9, 0x9AD9, 0x9AF8, 0x4905, 0x4906, 0x4907, 0x4908
112 };
113 const uint16_t adlp_ids[] = { 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A6, 0x46A8, 0x46AA,
114 0x462A, 0x4626, 0x4628, 0x46B0, 0x46B1, 0x46B2, 0x46B3,
115 0x46C0, 0x46C1, 0x46C2, 0x46C3, 0x46D0, 0x46D1, 0x46D2 };
116
117 const uint16_t rplp_ids[] = { 0xA720, 0xA721, 0xA7A0, 0xA7A1, 0xA7A8, 0xA7A9 };
118
119 const uint16_t mtl_ids[] = { 0x7D40, 0x7D60, 0x7D45, 0x7D55, 0x7DD5 };
120
121 unsigned i;
122 i915->graphics_version = 4;
123 i915->is_xelpd = false;
124 i915->is_mtl = false;
125
126 for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
127 if (gen3_ids[i] == i915->device_id)
128 i915->graphics_version = 3;
129
130 /* Gen 4 */
131 for (i = 0; i < ARRAY_SIZE(gen4_ids); i++)
132 if (gen4_ids[i] == i915->device_id)
133 i915->graphics_version = 4;
134
135 /* Gen 5 */
136 for (i = 0; i < ARRAY_SIZE(gen5_ids); i++)
137 if (gen5_ids[i] == i915->device_id)
138 i915->graphics_version = 5;
139
140 /* Gen 6 */
141 for (i = 0; i < ARRAY_SIZE(gen6_ids); i++)
142 if (gen6_ids[i] == i915->device_id)
143 i915->graphics_version = 6;
144
145 /* Gen 7 */
146 for (i = 0; i < ARRAY_SIZE(gen7_ids); i++)
147 if (gen7_ids[i] == i915->device_id)
148 i915->graphics_version = 7;
149
150 /* Gen 8 */
151 for (i = 0; i < ARRAY_SIZE(gen8_ids); i++)
152 if (gen8_ids[i] == i915->device_id)
153 i915->graphics_version = 8;
154
155 /* Gen 9 */
156 for (i = 0; i < ARRAY_SIZE(gen9_ids); i++)
157 if (gen9_ids[i] == i915->device_id)
158 i915->graphics_version = 9;
159
160 /* Gen 11 */
161 for (i = 0; i < ARRAY_SIZE(gen11_ids); i++)
162 if (gen11_ids[i] == i915->device_id)
163 i915->graphics_version = 11;
164
165 /* Gen 12 */
166 for (i = 0; i < ARRAY_SIZE(gen12_ids); i++)
167 if (gen12_ids[i] == i915->device_id)
168 i915->graphics_version = 12;
169
170 for (i = 0; i < ARRAY_SIZE(adlp_ids); i++)
171 if (adlp_ids[i] == i915->device_id) {
172 i915->is_xelpd = true;
173 i915->graphics_version = 12;
174 }
175
176 for (i = 0; i < ARRAY_SIZE(rplp_ids); i++)
177 if (rplp_ids[i] == i915->device_id) {
178 i915->is_xelpd = true;
179 i915->graphics_version = 12;
180 }
181
182 for (i = 0; i < ARRAY_SIZE(mtl_ids); i++)
183 if (mtl_ids[i] == i915->device_id) {
184 i915->graphics_version = 12;
185 i915->is_mtl = true;
186 }
187 }
188
i915_get_modifier_order(struct i915_device * i915)189 static void i915_get_modifier_order(struct i915_device *i915)
190 {
191 if (i915->is_mtl) {
192 i915->modifier.order = xe_lpdp_modifier_order;
193 i915->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
194 } else if (i915->graphics_version == 12) {
195 i915->modifier.order = gen12_modifier_order;
196 i915->modifier.count = ARRAY_SIZE(gen12_modifier_order);
197 } else if (i915->graphics_version == 11) {
198 i915->modifier.order = gen11_modifier_order;
199 i915->modifier.count = ARRAY_SIZE(gen11_modifier_order);
200 } else {
201 i915->modifier.order = gen_modifier_order;
202 i915->modifier.count = ARRAY_SIZE(gen_modifier_order);
203 }
204 }
205
unset_flags(uint64_t current_flags,uint64_t mask)206 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
207 {
208 uint64_t value = current_flags & ~mask;
209 return value;
210 }
211
i915_add_combinations(struct driver * drv)212 static int i915_add_combinations(struct driver *drv)
213 {
214 struct i915_device *i915 = drv->priv;
215
216 const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
217 const uint64_t render = BO_USE_RENDER_MASK;
218 const uint64_t texture_only = BO_USE_TEXTURE_MASK;
219 // HW protected buffers also need to be scanned out.
220 const uint64_t hw_protected =
221 i915->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
222
223 const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
224 BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
225 BO_USE_SW_WRITE_RARELY;
226
227 struct format_metadata metadata_linear = { .tiling = I915_TILING_NONE,
228 .priority = 1,
229 .modifier = DRM_FORMAT_MOD_LINEAR };
230
231 drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
232 &metadata_linear, scanout_and_render);
233
234 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
235 render);
236
237 drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
238 &metadata_linear, texture_only);
239
240 drv_modify_linear_combinations(drv);
241
242 /* NV12 format for camera, display, decoding and encoding. */
243 /* IPU3 camera ISP supports only NV12 output. */
244 drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
245 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
246 BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
247 hw_protected);
248
249 /* Android CTS tests require this. */
250 drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
251
252 /*
253 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
254 * from camera and input/output from hardware decoder/encoder.
255 */
256 drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
257 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
258 BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
259 BO_USE_SENSOR_DIRECT_DATA);
260
261 const uint64_t render_not_linear = unset_flags(render, linear_mask);
262 const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
263
264 struct format_metadata metadata_x_tiled = { .tiling = I915_TILING_X,
265 .priority = 2,
266 .modifier = I915_FORMAT_MOD_X_TILED };
267
268 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
269 render_not_linear);
270 drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
271 &metadata_x_tiled, scanout_and_render_not_linear);
272
273 if (i915->is_mtl) {
274 struct format_metadata metadata_4_tiled = { .tiling = I915_TILING_4,
275 .priority = 3,
276 .modifier = I915_FORMAT_MOD_4_TILED };
277 /* Support tile4 NV12 and P010 for libva */
278 #ifdef I915_SCANOUT_4_TILED
279 const uint64_t nv12_usage =
280 BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
281 const uint64_t p010_usage =
282 BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected | BO_USE_SCANOUT;
283 #else
284 const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
285 const uint64_t p010_usage = nv12_usage;
286 #endif
287 drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
288 drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
289 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
290 &metadata_4_tiled, render_not_linear);
291 drv_add_combinations(drv, scanout_render_formats,
292 ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
293 render_not_linear);
294 } else {
295 struct format_metadata metadata_y_tiled = { .tiling = I915_TILING_Y,
296 .priority = 3,
297 .modifier = I915_FORMAT_MOD_Y_TILED };
298 /* Support y-tiled NV12 and P010 for libva */
299 #ifdef I915_SCANOUT_Y_TILED
300 const uint64_t nv12_usage =
301 BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
302 const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER |
303 hw_protected |
304 (i915->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
305 #else
306 const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
307 const uint64_t p010_usage = nv12_usage;
308 #endif
309 drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
310 drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
311 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
312 &metadata_y_tiled, render_not_linear);
313 /* Y-tiled scanout isn't available on old platforms so we add
314 * |scanout_render_formats| without that USE flag.
315 */
316 drv_add_combinations(drv, scanout_render_formats,
317 ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
318 render_not_linear);
319 }
320 return 0;
321 }
322
i915_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)323 static int i915_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
324 uint32_t *aligned_height)
325 {
326 struct i915_device *i915 = bo->drv->priv;
327 uint32_t horizontal_alignment;
328 uint32_t vertical_alignment;
329
330 switch (tiling) {
331 default:
332 case I915_TILING_NONE:
333 /*
334 * The Intel GPU doesn't need any alignment in linear mode,
335 * but libva requires the allocation stride to be aligned to
336 * 16 bytes and height to 4 rows. Further, we round up the
337 * horizontal alignment so that row start on a cache line (64
338 * bytes).
339 */
340 #ifdef LINEAR_ALIGN_256
341 /*
342 * If we want to import these buffers to amdgpu they need to
343 * their match LINEAR_ALIGNED requirement of 256 byte alignement.
344 */
345 horizontal_alignment = 256;
346 #else
347 horizontal_alignment = 64;
348 #endif
349 /*
350 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
351 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
352 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
353 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
354 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
355 * will cause the GPU to read out-of-bounds.
356 *
357 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
358 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
359 * virtgpu_cross_domain backend which passes that flag through.
360 */
361 if (format == DRM_FORMAT_R8 && *aligned_height == 1) {
362 vertical_alignment = 1;
363 } else {
364 vertical_alignment = 4;
365 }
366
367 break;
368
369 case I915_TILING_X:
370 horizontal_alignment = 512;
371 vertical_alignment = 8;
372 break;
373
374 case I915_TILING_Y:
375 case I915_TILING_4:
376 if (i915->graphics_version == 3) {
377 horizontal_alignment = 512;
378 vertical_alignment = 8;
379 } else {
380 horizontal_alignment = 128;
381 vertical_alignment = 32;
382 }
383 break;
384 }
385
386 *aligned_height = ALIGN(*aligned_height, vertical_alignment);
387 if (i915->graphics_version > 3) {
388 *stride = ALIGN(*stride, horizontal_alignment);
389 } else {
390 while (*stride > horizontal_alignment)
391 horizontal_alignment <<= 1;
392
393 *stride = horizontal_alignment;
394 }
395
396 if (i915->graphics_version <= 3 && *stride > 8192)
397 return -EINVAL;
398
399 return 0;
400 }
401
i915_clflush(void * start,size_t size)402 static void i915_clflush(void *start, size_t size)
403 {
404 void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
405 void *end = (void *)((uintptr_t)start + size);
406
407 __builtin_ia32_mfence();
408 while (p < end) {
409 __builtin_ia32_clflush(p);
410 p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
411 }
412 }
413
i915_init(struct driver * drv)414 static int i915_init(struct driver *drv)
415 {
416 int ret, val;
417 struct i915_device *i915;
418 drm_i915_getparam_t get_param = { 0 };
419
420 i915 = calloc(1, sizeof(*i915));
421 if (!i915)
422 return -ENOMEM;
423
424 get_param.param = I915_PARAM_CHIPSET_ID;
425 get_param.value = &(i915->device_id);
426 ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
427 if (ret) {
428 drv_loge("Failed to get I915_PARAM_CHIPSET_ID\n");
429 free(i915);
430 return -EINVAL;
431 }
432 /* must call before i915->graphics_version is used anywhere else */
433 i915_info_from_device_id(i915);
434
435 i915_get_modifier_order(i915);
436
437 memset(&get_param, 0, sizeof(get_param));
438 get_param.param = I915_PARAM_HAS_LLC;
439 get_param.value = &i915->has_llc;
440 ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
441 if (ret) {
442 drv_loge("Failed to get I915_PARAM_HAS_LLC\n");
443 free(i915);
444 return -EINVAL;
445 }
446
447 memset(&get_param, 0, sizeof(get_param));
448 get_param.param = I915_PARAM_NUM_FENCES_AVAIL;
449 get_param.value = &i915->num_fences_avail;
450 ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
451 if (ret) {
452 drv_loge("Failed to get I915_PARAM_NUM_FENCES_AVAIL\n");
453 free(i915);
454 return -EINVAL;
455 }
456
457 memset(&get_param, 0, sizeof(get_param));
458 get_param.param = I915_PARAM_MMAP_GTT_VERSION;
459 get_param.value = &val;
460
461 ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
462 if (ret) {
463 drv_loge("Failed to get I915_PARAM_MMAP_GTT_VERSION\n");
464 free(i915);
465 return -EINVAL;
466 }
467 i915->has_mmap_offset = (val >= 4);
468
469 if (i915->graphics_version >= 12)
470 i915->has_hw_protection = 1;
471
472 drv->priv = i915;
473 return i915_add_combinations(drv);
474 }
475
476 /*
477 * Returns true if the height of a buffer of the given format should be aligned
478 * to the largest coded unit (LCU) assuming that it will be used for video. This
479 * is based on gmmlib's GmmIsYUVFormatLCUAligned().
480 */
i915_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct i915_device * i915)481 static bool i915_format_needs_LCU_alignment(uint32_t format, size_t plane,
482 const struct i915_device *i915)
483 {
484 switch (format) {
485 case DRM_FORMAT_NV12:
486 case DRM_FORMAT_P010:
487 case DRM_FORMAT_P016:
488 return (i915->graphics_version == 11 || i915->graphics_version == 12) && plane == 1;
489 }
490 return false;
491 }
492
i915_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)493 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
494 {
495 uint32_t offset;
496 size_t plane;
497 int ret, pagesize;
498 struct i915_device *i915 = bo->drv->priv;
499
500 offset = 0;
501 pagesize = getpagesize();
502
503 for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
504 uint32_t stride = drv_stride_from_format(format, width, plane);
505 uint32_t plane_height = drv_height_from_format(format, height, plane);
506
507 if (bo->meta.tiling != I915_TILING_NONE)
508 assert(IS_ALIGNED(offset, pagesize));
509
510 ret = i915_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
511 if (ret)
512 return ret;
513
514 if (i915_format_needs_LCU_alignment(format, plane, i915)) {
515 /*
516 * Align the height of the V plane for certain formats to the
517 * largest coded unit (assuming that this BO may be used for video)
518 * to be consistent with gmmlib.
519 */
520 plane_height = ALIGN(plane_height, 64);
521 }
522
523 bo->meta.strides[plane] = stride;
524 bo->meta.sizes[plane] = stride * plane_height;
525 bo->meta.offsets[plane] = offset;
526 offset += bo->meta.sizes[plane];
527 }
528
529 bo->meta.total_size = ALIGN(offset, pagesize);
530
531 return 0;
532 }
533
i915_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)534 static size_t i915_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
535 {
536 size_t num_planes = drv_num_planes_from_format(format);
537 if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
538 modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
539 assert(num_planes == 1);
540 return 2;
541 }
542
543 return num_planes;
544 }
545
i915_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)546 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
547 uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
548 {
549 uint64_t modifier;
550 struct i915_device *i915 = bo->drv->priv;
551 bool huge_bo = (i915->graphics_version < 11) && (width > 4096);
552
553 if (modifiers) {
554 modifier =
555 drv_pick_modifier(modifiers, count, i915->modifier.order, i915->modifier.count);
556 } else {
557 struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
558 if (!combo)
559 return -EINVAL;
560 modifier = combo->metadata.modifier;
561 }
562
563 /*
564 * i915 only supports linear/x-tiled above 4096 wide on Gen9/Gen10 GPU.
565 * VAAPI decode in NV12 Y tiled format so skip modifier change for NV12/P010 huge bo.
566 */
567 if (huge_bo && format != DRM_FORMAT_NV12 && format != DRM_FORMAT_P010 &&
568 modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
569 uint32_t i;
570 for (i = 0; modifiers && i < count; i++) {
571 if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
572 break;
573 }
574 if (i == count)
575 modifier = DRM_FORMAT_MOD_LINEAR;
576 else
577 modifier = I915_FORMAT_MOD_X_TILED;
578 }
579
580 /*
581 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
582 * Pick y tiled modifier if it has been passed in, otherwise use linear
583 */
584 if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
585 uint32_t i;
586 for (i = 0; modifiers && i < count; i++) {
587 if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
588 break;
589 }
590 if (i == count)
591 modifier = DRM_FORMAT_MOD_LINEAR;
592 else
593 modifier = I915_FORMAT_MOD_Y_TILED;
594 }
595
596 /* Prevent gen 8 and earlier from trying to use a tiling modifier */
597 if (i915->graphics_version <= 8 && format == DRM_FORMAT_ARGB8888) {
598 modifier = DRM_FORMAT_MOD_LINEAR;
599 }
600
601 switch (modifier) {
602 case DRM_FORMAT_MOD_LINEAR:
603 bo->meta.tiling = I915_TILING_NONE;
604 break;
605 case I915_FORMAT_MOD_X_TILED:
606 bo->meta.tiling = I915_TILING_X;
607 break;
608 case I915_FORMAT_MOD_Y_TILED:
609 case I915_FORMAT_MOD_Y_TILED_CCS:
610 /* For now support only I915_TILING_Y as this works with all
611 * IPs(render/media/display)
612 */
613 case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
614 bo->meta.tiling = I915_TILING_Y;
615 break;
616 case I915_FORMAT_MOD_4_TILED:
617 bo->meta.tiling = I915_TILING_4;
618 break;
619 }
620
621 bo->meta.format_modifier = modifier;
622
623 if (format == DRM_FORMAT_YVU420_ANDROID) {
624 /*
625 * We only need to be able to use this as a linear texture,
626 * which doesn't put any HW restrictions on how we lay it
627 * out. The Android format does require the stride to be a
628 * multiple of 16 and expects the Cr and Cb stride to be
629 * ALIGN(Y_stride / 2, 16), which we can make happen by
630 * aligning to 32 bytes here.
631 */
632 uint32_t stride = ALIGN(width, 32);
633 return drv_bo_from_format(bo, stride, 1, height, format);
634 } else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
635 /*
636 * For compressed surfaces, we need a color control surface
637 * (CCS). Color compression is only supported for Y tiled
638 * surfaces, and for each 32x16 tiles in the main surface we
639 * need a tile in the control surface. Y tiles are 128 bytes
640 * wide and 32 lines tall and we use that to first compute the
641 * width and height in tiles of the main surface. stride and
642 * height are already multiples of 128 and 32, respectively:
643 */
644 uint32_t stride = drv_stride_from_format(format, width, 0);
645 uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
646 uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
647 uint32_t size = width_in_tiles * height_in_tiles * 4096;
648 uint32_t offset = 0;
649
650 bo->meta.strides[0] = width_in_tiles * 128;
651 bo->meta.sizes[0] = size;
652 bo->meta.offsets[0] = offset;
653 offset += size;
654
655 /*
656 * Now, compute the width and height in tiles of the control
657 * surface by dividing and rounding up.
658 */
659 uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
660 uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
661 uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
662
663 /*
664 * With stride and height aligned to y tiles, offset is
665 * already a multiple of 4096, which is the required alignment
666 * of the CCS.
667 */
668 bo->meta.strides[1] = ccs_width_in_tiles * 128;
669 bo->meta.sizes[1] = ccs_size;
670 bo->meta.offsets[1] = offset;
671 offset += ccs_size;
672
673 bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
674 bo->meta.total_size = offset;
675 } else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
676
677 /*
678 * considering only 128 byte compression and one cache line of
679 * aux buffer(64B) contains compression status of 4-Y tiles.
680 * Which is 4 * (128B * 32L).
681 * line stride(bytes) is 4 * 128B
682 * and tile stride(lines) is 32L
683 */
684 uint32_t stride = ALIGN(drv_stride_from_format(format, width, 0), 512);
685
686 height = ALIGN(drv_height_from_format(format, height, 0), 32);
687
688 if (i915->is_xelpd && (stride > 1)) {
689 stride = 1 << (32 - __builtin_clz(stride - 1));
690 height = ALIGN(drv_height_from_format(format, height, 0), 128);
691 }
692
693 bo->meta.strides[0] = stride;
694 /* size calculation and alignment are 64KB aligned
695 * size as per spec
696 */
697 bo->meta.sizes[0] = ALIGN(stride * height, 65536);
698 bo->meta.offsets[0] = 0;
699
700 /* Aux buffer is linear and page aligned. It is placed after
701 * other planes and aligned to main buffer stride.
702 */
703 bo->meta.strides[1] = bo->meta.strides[0] / 8;
704 /* Aligned to page size */
705 bo->meta.sizes[1] = ALIGN(bo->meta.sizes[0] / 256, getpagesize());
706 bo->meta.offsets[1] = bo->meta.sizes[0];
707 /* Total number of planes & sizes */
708 bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
709 bo->meta.total_size = bo->meta.sizes[0] + bo->meta.sizes[1];
710 } else {
711 return i915_bo_from_format(bo, width, height, format);
712 }
713 return 0;
714 }
715
i915_bo_create_from_metadata(struct bo * bo)716 static int i915_bo_create_from_metadata(struct bo *bo)
717 {
718 int ret;
719 size_t plane;
720 uint32_t gem_handle;
721 struct drm_i915_gem_set_tiling gem_set_tiling = { 0 };
722 struct i915_device *i915 = bo->drv->priv;
723
724 if (i915->has_hw_protection && (bo->meta.use_flags & BO_USE_PROTECTED)) {
725 struct drm_i915_gem_create_ext_protected_content protected_content = {
726 .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
727 .flags = 0,
728 };
729
730 struct drm_i915_gem_create_ext create_ext = {
731 .size = bo->meta.total_size,
732 .extensions = (uintptr_t)&protected_content,
733 };
734
735 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
736 if (ret) {
737 drv_loge("DRM_IOCTL_I915_GEM_CREATE_EXT failed (size=%llu) (ret=%d) \n",
738 create_ext.size, ret);
739 return -errno;
740 }
741
742 gem_handle = create_ext.handle;
743 } else {
744 struct drm_i915_gem_create gem_create = { 0 };
745 gem_create.size = bo->meta.total_size;
746 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
747 if (ret) {
748 drv_loge("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
749 return -errno;
750 }
751
752 gem_handle = gem_create.handle;
753 }
754
755 for (plane = 0; plane < bo->meta.num_planes; plane++)
756 bo->handles[plane].u32 = gem_handle;
757
758 /* Set/Get tiling ioctl not supported based on fence availability
759 Refer : "https://patchwork.freedesktop.org/patch/325343/"
760 */
761 if (i915->num_fences_avail) {
762 gem_set_tiling.handle = bo->handles[0].u32;
763 gem_set_tiling.tiling_mode = bo->meta.tiling;
764 gem_set_tiling.stride = bo->meta.strides[0];
765
766 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
767 if (ret) {
768 struct drm_gem_close gem_close = { 0 };
769 gem_close.handle = bo->handles[0].u32;
770 drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
771
772 drv_loge("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
773 return -errno;
774 }
775 }
776 return 0;
777 }
778
i915_close(struct driver * drv)779 static void i915_close(struct driver *drv)
780 {
781 free(drv->priv);
782 drv->priv = NULL;
783 }
784
i915_bo_import(struct bo * bo,struct drv_import_fd_data * data)785 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
786 {
787 int ret;
788 struct drm_i915_gem_get_tiling gem_get_tiling = { 0 };
789 struct i915_device *i915 = bo->drv->priv;
790
791 bo->meta.num_planes =
792 i915_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
793
794 ret = drv_prime_bo_import(bo, data);
795 if (ret)
796 return ret;
797
798 /* Set/Get tiling ioctl not supported based on fence availability
799 Refer : "https://patchwork.freedesktop.org/patch/325343/"
800 */
801 if (i915->num_fences_avail) {
802 /* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
803 gem_get_tiling.handle = bo->handles[0].u32;
804
805 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
806 if (ret) {
807 drv_gem_bo_destroy(bo);
808 drv_loge("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
809 return ret;
810 }
811 bo->meta.tiling = gem_get_tiling.tiling_mode;
812 }
813 return 0;
814 }
815
i915_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)816 static void *i915_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
817 {
818 int ret;
819 void *addr = MAP_FAILED;
820 struct i915_device *i915 = bo->drv->priv;
821
822 if ((bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_CCS) ||
823 (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) ||
824 (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED))
825 return MAP_FAILED;
826
827 if (bo->meta.tiling == I915_TILING_NONE) {
828 if (i915->has_mmap_offset) {
829 struct drm_i915_gem_mmap_offset gem_map = { 0 };
830 gem_map.handle = bo->handles[0].u32;
831 gem_map.flags = I915_MMAP_OFFSET_WB;
832
833 /* Get the fake offset back */
834 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_map);
835 if (ret == 0)
836 addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags),
837 MAP_SHARED, bo->drv->fd, gem_map.offset);
838 } else {
839 struct drm_i915_gem_mmap gem_map = { 0 };
840 /* TODO(b/118799155): We don't seem to have a good way to
841 * detect the use cases for which WC mapping is really needed.
842 * The current heuristic seems overly coarse and may be slowing
843 * down some other use cases unnecessarily.
844 *
845 * For now, care must be taken not to use WC mappings for
846 * Renderscript and camera use cases, as they're
847 * performance-sensitive. */
848 if ((bo->meta.use_flags & BO_USE_SCANOUT) &&
849 !(bo->meta.use_flags &
850 (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
851 gem_map.flags = I915_MMAP_WC;
852
853 gem_map.handle = bo->handles[0].u32;
854 gem_map.offset = 0;
855 gem_map.size = bo->meta.total_size;
856
857 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
858 /* DRM_IOCTL_I915_GEM_MMAP mmaps the underlying shm
859 * file and returns a user space address directly, ie,
860 * doesn't go through mmap. If we try that on a
861 * dma-buf that doesn't have a shm file, i915.ko
862 * returns ENXIO. Fall through to
863 * DRM_IOCTL_I915_GEM_MMAP_GTT in that case, which
864 * will mmap on the drm fd instead. */
865 if (ret == 0)
866 addr = (void *)(uintptr_t)gem_map.addr_ptr;
867 }
868 }
869
870 if (addr == MAP_FAILED) {
871 struct drm_i915_gem_mmap_gtt gem_map = { 0 };
872
873 gem_map.handle = bo->handles[0].u32;
874 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
875 if (ret) {
876 drv_loge("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
877 return MAP_FAILED;
878 }
879
880 addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
881 bo->drv->fd, gem_map.offset);
882 }
883
884 if (addr == MAP_FAILED) {
885 drv_loge("i915 GEM mmap failed\n");
886 return addr;
887 }
888
889 vma->length = bo->meta.total_size;
890 return addr;
891 }
892
i915_bo_invalidate(struct bo * bo,struct mapping * mapping)893 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
894 {
895 int ret;
896 struct drm_i915_gem_set_domain set_domain = { 0 };
897
898 set_domain.handle = bo->handles[0].u32;
899 if (bo->meta.tiling == I915_TILING_NONE) {
900 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
901 if (mapping->vma->map_flags & BO_MAP_WRITE)
902 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
903 } else {
904 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
905 if (mapping->vma->map_flags & BO_MAP_WRITE)
906 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
907 }
908
909 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
910 if (ret) {
911 drv_loge("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
912 return ret;
913 }
914
915 return 0;
916 }
917
i915_bo_flush(struct bo * bo,struct mapping * mapping)918 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
919 {
920 struct i915_device *i915 = bo->drv->priv;
921 if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
922 i915_clflush(mapping->vma->addr, mapping->vma->length);
923
924 return 0;
925 }
926
927 const struct backend backend_i915 = {
928 .name = "i915",
929 .init = i915_init,
930 .close = i915_close,
931 .bo_compute_metadata = i915_bo_compute_metadata,
932 .bo_create_from_metadata = i915_bo_create_from_metadata,
933 .bo_destroy = drv_gem_bo_destroy,
934 .bo_import = i915_bo_import,
935 .bo_map = i915_bo_map,
936 .bo_unmap = drv_bo_munmap,
937 .bo_invalidate = i915_bo_invalidate,
938 .bo_flush = i915_bo_flush,
939 .resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
940 .num_planes_from_modifier = i915_num_planes_from_modifier,
941 };
942
943 #endif
944