1 /*
2 * Copyright 2024 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #ifdef DRV_XE
8
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20
21 #include "external/xe_drm.h"
22 #include "util.h"
23 #include "intel_defines.h"
24
25 struct modifier_support_t {
26 const uint64_t *order;
27 uint32_t count;
28 };
29
30 struct xe_device {
31 uint32_t graphics_version;
32 int device_id;
33 bool is_xelpd;
34 /*TODO : cleanup is_mtl_or_newer to avoid adding variables for every new platforms */
35 bool is_mtl_or_newer;
36 int32_t has_hw_protection;
37 bool has_local_mem;
38 int revision;
39
40 uint64_t gtt_size;
41 /**
42 * Memory vm bind alignment and buffer size requirement
43 */
44
45 unsigned mem_alignment;
46 struct modifier_support_t modifier;
47 int32_t num_fences_avail;
48 bool has_mmap_offset;
49 };
50
xe_info_from_device_id(struct xe_device * xe)51 static void xe_info_from_device_id(struct xe_device *xe)
52 {
53 unsigned i;
54 xe->graphics_version = 0;
55 xe->is_xelpd = false;
56 xe->is_mtl_or_newer = false;
57
58 /* search lists from most-->least specific */
59 for (i = 0; i < ARRAY_SIZE(adlp_ids); i++) {
60 if (adlp_ids[i] == xe->device_id) {
61 xe->is_xelpd = true;
62 xe->graphics_version = 12;
63 return;
64 }
65 }
66
67 for (i = 0; i < ARRAY_SIZE(rplp_ids); i++) {
68 if (rplp_ids[i] == xe->device_id) {
69 xe->is_xelpd = true;
70 xe->graphics_version = 12;
71 return;
72 }
73 }
74
75 for (i = 0; i < ARRAY_SIZE(mtl_ids); i++) {
76 if (mtl_ids[i] == xe->device_id) {
77 xe->graphics_version = 12;
78 xe->is_mtl_or_newer = true;
79 return;
80 }
81 }
82
83 for (i = 0; i < ARRAY_SIZE(lnl_ids); i++) {
84 if (lnl_ids[i] == xe->device_id) {
85 xe->graphics_version = 20;
86 xe->is_mtl_or_newer = true;
87 return;
88 }
89 }
90
91 for (i = 0; i < ARRAY_SIZE(ptl_ids); i++) {
92 if (ptl_ids[i] == xe->device_id) {
93 xe->graphics_version = 30;
94 xe->is_mtl_or_newer = true;
95 return;
96 }
97 }
98
99 /* Gen 12 */
100 for (i = 0; i < ARRAY_SIZE(gen12_ids); i++) {
101 if (gen12_ids[i] == xe->device_id) {
102 xe->graphics_version = 12;
103 return;
104 }
105 }
106 }
107
xe_get_modifier_order(struct xe_device * xe)108 static void xe_get_modifier_order(struct xe_device *xe)
109 {
110 if (xe->is_mtl_or_newer) {
111 xe->modifier.order = xe_lpdp_modifier_order;
112 xe->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
113 } else if (xe->is_xelpd) {
114 xe->modifier.order = gen12_modifier_order;
115 xe->modifier.count = ARRAY_SIZE(gen12_modifier_order);
116 } else {
117 xe->modifier.order = xe_lpdp_modifier_order;
118 xe->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
119 }
120 }
121
unset_flags(uint64_t current_flags,uint64_t mask)122 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
123 {
124 uint64_t value = current_flags & ~mask;
125 return value;
126 }
127
128 /* TODO(ryanneph): share implementation with i915_add_combinations */
xe_add_combinations(struct driver * drv)129 static int xe_add_combinations(struct driver *drv)
130 {
131 struct xe_device *xe = drv->priv;
132
133 const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
134 const uint64_t render = BO_USE_RENDER_MASK;
135 const uint64_t texture_only = BO_USE_TEXTURE_MASK;
136 // HW protected buffers also need to be scanned out.
137 const uint64_t hw_protected =
138 xe->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
139
140 const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
141 BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
142 BO_USE_SW_WRITE_RARELY;
143
144 struct format_metadata metadata_linear = { .tiling = XE_TILING_NONE,
145 .priority = 1,
146 .modifier = DRM_FORMAT_MOD_LINEAR };
147
148 drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
149 &metadata_linear, scanout_and_render);
150
151 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
152 render);
153
154 drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
155 &metadata_linear, texture_only);
156
157 drv_modify_linear_combinations(drv);
158
159 /* NV12 format for camera, display, decoding and encoding. */
160 /* IPU3 camera ISP supports only NV12 output. */
161 drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
162 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
163 BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
164 hw_protected);
165
166 /* P010 linear can be used for scanout too. */
167 drv_modify_combination(drv, DRM_FORMAT_P010, &metadata_linear, BO_USE_SCANOUT);
168
169 /*
170 * Android also frequently requests YV12 formats for some camera implementations
171 * (including the external provider implementation).
172 */
173 drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &metadata_linear,
174 BO_USE_CAMERA_WRITE);
175
176 /* Android CTS tests require this. */
177 drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
178
179 /*
180 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
181 * from camera and input/output from hardware decoder/encoder.
182 */
183 drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
184 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
185 BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
186 BO_USE_SENSOR_DIRECT_DATA);
187
188 const uint64_t render_not_linear = unset_flags(render, linear_mask);
189 const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
190 struct format_metadata metadata_x_tiled = { .tiling = XE_TILING_X,
191 .priority = 2,
192 .modifier = I915_FORMAT_MOD_X_TILED };
193
194 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
195 render_not_linear);
196 drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
197 &metadata_x_tiled, scanout_and_render_not_linear);
198
199 const uint64_t nv12_usage =
200 BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
201 const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected |
202 (xe->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
203
204 if (xe->is_mtl_or_newer) {
205 struct format_metadata metadata_4_tiled = { .tiling = XE_TILING_4,
206 .priority = 3,
207 .modifier = I915_FORMAT_MOD_4_TILED };
208
209 drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
210 drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
211 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
212 &metadata_4_tiled, render_not_linear);
213 drv_add_combinations(drv, scanout_render_formats,
214 ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
215 scanout_and_render_not_linear);
216 } else {
217 struct format_metadata metadata_y_tiled = { .tiling = XE_TILING_Y,
218 .priority = 3,
219 .modifier = I915_FORMAT_MOD_Y_TILED };
220
221 drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
222 &metadata_y_tiled, render_not_linear);
223 drv_add_combinations(drv, scanout_render_formats,
224 ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
225 scanout_and_render_not_linear);
226 drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
227 drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
228 }
229 return 0;
230 }
231
xe_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)232 static int xe_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
233 uint32_t *aligned_height)
234 {
235 uint32_t horizontal_alignment = 0;
236 uint32_t vertical_alignment = 0;
237
238 switch (tiling) {
239 default:
240 case XE_TILING_NONE:
241 /*
242 * The Intel GPU doesn't need any alignment in linear mode,
243 * but libva requires the allocation stride to be aligned to
244 * 16 bytes and height to 4 rows. Further, we round up the
245 * horizontal alignment so that row start on a cache line (64
246 * bytes).
247 */
248 #ifdef LINEAR_ALIGN_256
249 /*
250 * If we want to import these buffers to amdgpu they need to
251 * their match LINEAR_ALIGNED requirement of 256 byte alignment.
252 */
253 horizontal_alignment = 256;
254 #else
255 horizontal_alignment = 64;
256 #endif
257 /*
258 * For hardware video encoding buffers, we want to align to the size of a
259 * macroblock, because otherwise we will end up encoding uninitialized data.
260 * This can result in substantial quality degradations, especially on lower
261 * resolution videos, because this uninitialized data may be high entropy.
262 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
263 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
264 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
265 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
266 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
267 * will cause the GPU to read out-of-bounds.
268 *
269 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
270 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
271 * virtgpu_cross_domain backend which passes that flag through.
272 */
273 if (bo->meta.use_flags & BO_USE_HW_VIDEO_ENCODER) {
274 vertical_alignment = 8;
275 } else if (format == DRM_FORMAT_R8 && *aligned_height == 1)
276 vertical_alignment = 1;
277 else
278 vertical_alignment = 4;
279
280 break;
281 case XE_TILING_X:
282 horizontal_alignment = 512;
283 vertical_alignment = 8;
284 break;
285
286 case XE_TILING_Y:
287 case XE_TILING_4:
288 horizontal_alignment = 128;
289 vertical_alignment = 32;
290 break;
291 }
292
293 *aligned_height = ALIGN(*aligned_height, vertical_alignment);
294 *stride = ALIGN(*stride, horizontal_alignment);
295
296 return 0;
297 }
298
xe_query_config(struct driver * drv,struct xe_device * xe)299 static bool xe_query_config(struct driver *drv, struct xe_device *xe)
300 {
301 struct drm_xe_device_query query = {
302 .query = DRM_XE_DEVICE_QUERY_CONFIG,
303 };
304 if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
305 return false;
306
307 struct drm_xe_query_config *config = calloc(1, query.size);
308 if(!config)
309 return false;
310
311 query.data = (uintptr_t)config;
312 if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
313 goto data_query_failed;
314
315
316 if(config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM)
317 xe->has_local_mem = true;
318 else
319 xe->has_local_mem = false;
320
321 xe->revision = (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xFFFF;
322 xe->gtt_size = 1ull << config->info[DRM_XE_QUERY_CONFIG_VA_BITS];
323 xe->mem_alignment = config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT];
324
325 free(config);
326 return true;
327
328 data_query_failed:
329 free(config);
330 return false;
331 }
332
xe_device_probe(struct driver * drv,struct xe_device * xe)333 static bool xe_device_probe(struct driver *drv, struct xe_device *xe)
334 {
335 /* Retrieve the device info by querying KMD through IOCTL
336 */
337 struct drm_xe_device_query query = {
338 .extensions = 0,
339 .query = DRM_XE_DEVICE_QUERY_CONFIG,
340 .size = 0,
341 .data = 0,
342 };
343
344 if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
345 return false;
346
347 struct drm_xe_query_config *config = calloc(1, query.size);
348 if(!config)
349 return false;
350
351 query.data = (uintptr_t)config;
352 if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query)){
353 free(config);
354 return false;
355 }
356
357 xe->device_id = ((config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] << 16)>>16) & 0xFFFF;
358 xe->revision = (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xFFFF;
359
360 free(config);
361 return true;
362 }
363
xe_init(struct driver * drv)364 static int xe_init(struct driver *drv)
365 {
366 struct xe_device *xe;
367
368 xe = calloc(1, sizeof(*xe));
369 if (!xe)
370 return -ENOMEM;
371
372 if(!xe_device_probe(drv, xe)){
373 drv_loge("Failed to query device id using DRM_IOCTL_XE_DEVICE_QUERY");
374 return -EINVAL;
375 }
376
377 xe_query_config(drv, xe);
378
379 /* must call before xe->graphics_version is used anywhere else */
380 xe_info_from_device_id(xe);
381
382 xe_get_modifier_order(xe);
383
384 /* Xe still don't have support for protected content */
385 if (xe->graphics_version >= 12)
386 xe->has_hw_protection = 0;
387 else if (xe->graphics_version < 12) {
388 drv_loge("Xe driver is not supported on your platform: 0x%x\n",xe->device_id);
389 return -errno;
390 }
391
392 drv->priv = xe;
393
394 return xe_add_combinations(drv);
395 return 0;
396 }
397
398 /*
399 * Returns true if the height of a buffer of the given format should be aligned
400 * to the largest coded unit (LCU) assuming that it will be used for video. This
401 * is based on gmmlib's GmmIsYUVFormatLCUAligned().
402 */
xe_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct xe_device * xe)403 static bool xe_format_needs_LCU_alignment(uint32_t format, size_t plane,
404 const struct xe_device *xe)
405 {
406 switch (format) {
407 case DRM_FORMAT_NV12:
408 case DRM_FORMAT_P010:
409 case DRM_FORMAT_P016:
410 return (xe->graphics_version >= 12) && plane == 1;
411 }
412 return false;
413 }
414
xe_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)415 static int xe_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
416 {
417 uint32_t offset;
418 size_t plane;
419 int ret, pagesize;
420 struct xe_device *xe = bo->drv->priv;
421
422 offset = 0;
423 pagesize = getpagesize();
424
425 for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
426 uint32_t stride = drv_stride_from_format(format, width, plane);
427 uint32_t plane_height = drv_height_from_format(format, height, plane);
428
429 if (bo->meta.tiling != XE_TILING_NONE)
430 assert(IS_ALIGNED(offset, pagesize));
431
432 ret = xe_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
433 if (ret)
434 return ret;
435
436 if (xe_format_needs_LCU_alignment(format, plane, xe)) {
437 /*
438 * Align the height of the V plane for certain formats to the
439 * largest coded unit (assuming that this BO may be used for video)
440 * to be consistent with gmmlib.
441 */
442 plane_height = ALIGN(plane_height, 64);
443 }
444
445 bo->meta.strides[plane] = stride;
446 bo->meta.sizes[plane] = stride * plane_height;
447 bo->meta.offsets[plane] = offset;
448 offset += bo->meta.sizes[plane];
449 }
450
451 bo->meta.total_size = ALIGN(offset, pagesize);
452
453 return 0;
454 }
455
xe_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)456 static size_t xe_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
457 {
458 size_t num_planes = drv_num_planes_from_format(format);
459
460 if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
461 modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
462 assert(num_planes == 1);
463 return 2;
464 }
465
466 return num_planes;
467 }
468
xe_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)469 static int xe_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
470 uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
471 {
472 int ret = 0;
473 uint64_t modifier;
474 struct xe_device *xe = bo->drv->priv;
475
476 if (modifiers) {
477 modifier =
478 drv_pick_modifier(modifiers, count, xe->modifier.order, xe->modifier.count);
479 } else {
480 struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
481 if (!combo)
482 return -EINVAL;
483
484 if ((xe->is_mtl_or_newer) &&
485 (use_flags == (BO_USE_SCANOUT | BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER))) {
486 modifier = I915_FORMAT_MOD_4_TILED;
487 } else {
488 modifier = combo->metadata.modifier;
489 }
490 }
491
492 /*
493 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
494 * Pick y tiled modifier if it has been passed in, otherwise use linear
495 */
496 if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
497 uint32_t i;
498 for (i = 0; modifiers && i < count; i++) {
499 if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
500 break;
501 }
502 if (i == count)
503 modifier = DRM_FORMAT_MOD_LINEAR;
504 else
505 modifier = I915_FORMAT_MOD_Y_TILED;
506 }
507
508 switch (modifier) {
509 case DRM_FORMAT_MOD_LINEAR:
510 bo->meta.tiling = XE_TILING_NONE;
511 break;
512 case I915_FORMAT_MOD_X_TILED:
513 bo->meta.tiling = XE_TILING_X;
514 break;
515 case I915_FORMAT_MOD_Y_TILED:
516 case I915_FORMAT_MOD_Y_TILED_CCS:
517 /* For now support only I915_TILING_Y as this works with all
518 * IPs(render/media/display)
519 */
520 case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
521 bo->meta.tiling = XE_TILING_Y;
522 break;
523 case I915_FORMAT_MOD_4_TILED:
524 bo->meta.tiling = XE_TILING_4;
525 break;
526 }
527
528 bo->meta.format_modifier = modifier;
529
530 if (format == DRM_FORMAT_YVU420_ANDROID) {
531 /*
532 * We only need to be able to use this as a linear texture,
533 * which doesn't put any HW restrictions on how we lay it
534 * out. The Android format does require the stride to be a
535 * multiple of 16 and expects the Cr and Cb stride to be
536 * ALIGN(Y_stride / 2, 16), which we can make happen by
537 * aligning to 32 bytes here.
538 */
539 uint32_t stride = ALIGN(width, 32);
540 ret = drv_bo_from_format(bo, stride, 1, height, format);
541 bo->meta.total_size = ALIGN(bo->meta.total_size, getpagesize());
542 } else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
543 /*
544 * For compressed surfaces, we need a color control surface
545 * (CCS). Color compression is only supported for Y tiled
546 * surfaces, and for each 32x16 tiles in the main surface we
547 * need a tile in the control surface. Y tiles are 128 bytes
548 * wide and 32 lines tall and we use that to first compute the
549 * width and height in tiles of the main surface. stride and
550 * height are already multiples of 128 and 32, respectively:
551 */
552 uint32_t stride = drv_stride_from_format(format, width, 0);
553 uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
554 uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
555 uint32_t size = width_in_tiles * height_in_tiles * 4096;
556 uint32_t offset = 0;
557
558 bo->meta.strides[0] = width_in_tiles * 128;
559 bo->meta.sizes[0] = size;
560 bo->meta.offsets[0] = offset;
561 offset += size;
562
563 /*
564 * Now, compute the width and height in tiles of the control
565 * surface by dividing and rounding up.
566 */
567 uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
568 uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
569 uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
570
571 /*
572 * With stride and height aligned to y tiles, offset is
573 * already a multiple of 4096, which is the required alignment
574 * of the CCS.
575 */
576 bo->meta.strides[1] = ccs_width_in_tiles * 128;
577 bo->meta.sizes[1] = ccs_size;
578 bo->meta.offsets[1] = offset;
579 offset += ccs_size;
580
581 bo->meta.num_planes = xe_num_planes_from_modifier(bo->drv, format, modifier);
582 bo->meta.total_size = offset;
583 } else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
584 /*
585 * considering only 128 byte compression and one cache line of
586 * aux buffer(64B) contains compression status of 4-Y tiles.
587 * Which is 4 * (128B * 32L).
588 * line stride(bytes) is 4 * 128B
589 * and tile stride(lines) is 32L
590 */
591 uint32_t stride = ALIGN(drv_stride_from_format(format, width, 0), 512);
592
593 height = ALIGN(drv_height_from_format(format, height, 0), 32);
594
595 if (xe->is_xelpd && (stride > 1)) {
596 stride = 1 << (32 - __builtin_clz(stride - 1));
597 height = ALIGN(drv_height_from_format(format, height, 0), 128);
598 }
599
600 bo->meta.strides[0] = stride;
601 /* size calculation and alignment are 64KB aligned
602 * size as per spec
603 */
604 bo->meta.sizes[0] = ALIGN(stride * height, 65536);
605 bo->meta.offsets[0] = 0;
606
607 /* Aux buffer is linear and page aligned. It is placed after
608 * other planes and aligned to main buffer stride.
609 */
610 bo->meta.strides[1] = bo->meta.strides[0] / 8;
611 /* Aligned to page size */
612 bo->meta.sizes[1] = ALIGN(bo->meta.sizes[0] / 256, getpagesize());
613 bo->meta.offsets[1] = bo->meta.sizes[0];
614 /* Total number of planes & sizes */
615 bo->meta.num_planes = xe_num_planes_from_modifier(bo->drv, format, modifier);
616 bo->meta.total_size = bo->meta.sizes[0] + bo->meta.sizes[1];
617 } else {
618 ret = xe_bo_from_format(bo, width, height, format);
619 }
620
621 return ret;
622 }
623
xe_bo_create_from_metadata(struct bo * bo)624 static int xe_bo_create_from_metadata(struct bo *bo)
625 {
626 int ret;
627
628 uint32_t flags = 0;
629 uint32_t cpu_caching;
630 if (bo->meta.use_flags & BO_USE_SCANOUT) {
631 flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
632 cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
633 } else {
634 cpu_caching = DRM_XE_GEM_CPU_CACHING_WB;
635 }
636
637 struct drm_xe_gem_create gem_create = {
638 .vm_id = 0, /* ensure exportable to PRIME fd */
639 .size = bo->meta.total_size,
640 .flags = flags,
641 .cpu_caching = cpu_caching,
642 };
643
644 /* FIXME: let's assume iGPU with SYSMEM is only supported */
645 gem_create.placement |= BITFIELD_BIT(DRM_XE_MEM_REGION_CLASS_SYSMEM);
646
647 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
648 if (ret)
649 return -errno;
650
651 bo->handle.u32 = gem_create.handle;
652
653 return 0;
654 }
655
xe_close(struct driver * drv)656 static void xe_close(struct driver *drv)
657 {
658 free(drv->priv);
659 drv->priv = NULL;
660 }
661
xe_bo_import(struct bo * bo,struct drv_import_fd_data * data)662 static int xe_bo_import(struct bo *bo, struct drv_import_fd_data *data)
663 {
664 int ret;
665
666 bo->meta.num_planes =
667 xe_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
668
669 ret = drv_prime_bo_import(bo, data);
670 if (ret)
671 return ret;
672
673 return 0;
674 }
675
xe_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)676 static void *xe_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
677 {
678 int ret;
679 void *addr = MAP_FAILED;
680
681 struct drm_xe_gem_mmap_offset gem_map = {
682 .handle = bo->handle.u32,
683 };
684
685 /* Get the fake offset back */
686 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &gem_map);
687 if (ret == 0) {
688 addr = mmap(0, bo->meta.total_size, PROT_READ | PROT_WRITE,
689 MAP_SHARED, bo->drv->fd, gem_map.offset);
690 }
691
692 if (addr == MAP_FAILED) {
693 drv_loge("xe GEM mmap failed\n");
694 return addr;
695 }
696
697 vma->length = bo->meta.total_size;
698
699 return addr;
700 }
701
702 #define XE_CACHELINE_SIZE 64
703 #define XE_CACHELINE_MASK (XE_CACHELINE_SIZE - 1)
xe_clflush(void * start,size_t size)704 static void xe_clflush(void *start, size_t size)
705 {
706 /* copy of i915_clflush() */
707 void *p = (void *)(((uintptr_t)start) & ~XE_CACHELINE_MASK);
708 void *end = (void *)((uintptr_t)start + size);
709
710 __builtin_ia32_mfence();
711 while (p < end) {
712 #if defined(__CLFLUSHOPT__)
713 __builtin_ia32_clflushopt(p);
714 #else
715 __builtin_ia32_clflush(p);
716 #endif
717 p = (void *)((uintptr_t)p + XE_CACHELINE_SIZE);
718 }
719 __builtin_ia32_mfence();
720 }
721
xe_bo_flush(struct bo * bo,struct mapping * mapping)722 static int xe_bo_flush(struct bo *bo, struct mapping *mapping)
723 {
724 if (bo->meta.tiling == XE_TILING_NONE) {
725 xe_clflush(mapping->vma->addr, mapping->vma->length);
726 }
727
728 return 0;
729 }
730
731 const struct backend backend_xe = {
732 .name = "xe",
733 .init = xe_init,
734 .close = xe_close,
735 .bo_compute_metadata = xe_bo_compute_metadata,
736 .bo_create_from_metadata = xe_bo_create_from_metadata,
737 .bo_map = xe_bo_map,
738 .bo_destroy = drv_gem_bo_destroy,
739 .bo_unmap = drv_bo_munmap,
740 .num_planes_from_modifier = xe_num_planes_from_modifier,
741 .bo_import = xe_bo_import,
742 .bo_flush = xe_bo_flush,
743 .resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
744 };
745
746 #endif
747