• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_XE
8 
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17 
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 
21 #include "external/xe_drm.h"
22 #include "util.h"
23 #include "intel_defines.h"
24 
25 struct modifier_support_t {
26 	const uint64_t *order;
27 	uint32_t count;
28 };
29 
30 struct xe_device {
31 	uint32_t graphics_version;
32 	int device_id;
33 	bool is_xelpd;
34 	/*TODO : cleanup is_mtl_or_newer to avoid adding variables for every new platforms */
35 	bool is_mtl_or_newer;
36 	int32_t has_hw_protection;
37 	bool has_local_mem;
38 	int revision;
39 
40 	uint64_t gtt_size;
41 	/**
42 	  * Memory vm bind alignment and buffer size requirement
43 	  */
44 
45 	unsigned mem_alignment;
46 	struct modifier_support_t modifier;
47 	int32_t num_fences_avail;
48 	bool has_mmap_offset;
49 };
50 
xe_info_from_device_id(struct xe_device * xe)51 static void xe_info_from_device_id(struct xe_device *xe)
52 {
53 	unsigned i;
54 	xe->graphics_version = 0;
55 	xe->is_xelpd = false;
56 	xe->is_mtl_or_newer = false;
57 
58 	/* search lists from most-->least specific */
59 	for (i = 0; i < ARRAY_SIZE(adlp_ids); i++) {
60 		if (adlp_ids[i] == xe->device_id) {
61 			xe->is_xelpd = true;
62 			xe->graphics_version = 12;
63 			return;
64 		}
65 	}
66 
67 	for (i = 0; i < ARRAY_SIZE(rplp_ids); i++) {
68 		if (rplp_ids[i] == xe->device_id) {
69 			xe->is_xelpd = true;
70 			xe->graphics_version = 12;
71 			return;
72 		}
73 	}
74 
75 	for (i = 0; i < ARRAY_SIZE(mtl_ids); i++) {
76 		if (mtl_ids[i] == xe->device_id) {
77 			xe->graphics_version = 12;
78 			xe->is_mtl_or_newer = true;
79 			return;
80 		}
81 	}
82 
83 	for (i = 0; i < ARRAY_SIZE(lnl_ids); i++) {
84 		if (lnl_ids[i] == xe->device_id) {
85 			xe->graphics_version = 20;
86 			xe->is_mtl_or_newer = true;
87 			return;
88 		}
89 	}
90 
91 	for (i = 0; i < ARRAY_SIZE(ptl_ids); i++) {
92 		if (ptl_ids[i] == xe->device_id) {
93 			xe->graphics_version = 30;
94 			xe->is_mtl_or_newer = true;
95 			return;
96 		}
97 	}
98 
99 	/* Gen 12 */
100 	for (i = 0; i < ARRAY_SIZE(gen12_ids); i++) {
101 		if (gen12_ids[i] == xe->device_id) {
102 			xe->graphics_version = 12;
103 			return;
104 		}
105 	}
106 }
107 
xe_get_modifier_order(struct xe_device * xe)108 static void xe_get_modifier_order(struct xe_device *xe)
109 {
110 	if (xe->is_mtl_or_newer) {
111 		xe->modifier.order = xe_lpdp_modifier_order;
112 		xe->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
113 	} else if (xe->is_xelpd) {
114 		xe->modifier.order = gen12_modifier_order;
115 		xe->modifier.count = ARRAY_SIZE(gen12_modifier_order);
116 	} else {
117 		xe->modifier.order = xe_lpdp_modifier_order;
118 		xe->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
119 	}
120 }
121 
unset_flags(uint64_t current_flags,uint64_t mask)122 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
123 {
124 	uint64_t value = current_flags & ~mask;
125 	return value;
126 }
127 
128 /* TODO(ryanneph): share implementation with i915_add_combinations */
xe_add_combinations(struct driver * drv)129 static int xe_add_combinations(struct driver *drv)
130 {
131 	struct xe_device *xe = drv->priv;
132 
133 	const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
134 	const uint64_t render = BO_USE_RENDER_MASK;
135 	const uint64_t texture_only = BO_USE_TEXTURE_MASK;
136 	// HW protected buffers also need to be scanned out.
137 	const uint64_t hw_protected =
138 		xe->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
139 
140 	const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
141 				     BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
142 				     BO_USE_SW_WRITE_RARELY;
143 
144 	struct format_metadata metadata_linear = { .tiling = XE_TILING_NONE,
145 						   .priority = 1,
146 						   .modifier = DRM_FORMAT_MOD_LINEAR };
147 
148 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
149 			     &metadata_linear, scanout_and_render);
150 
151 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
152 			     render);
153 
154 	drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
155 			     &metadata_linear, texture_only);
156 
157 	drv_modify_linear_combinations(drv);
158 
159 	/* NV12 format for camera, display, decoding and encoding. */
160 	/* IPU3 camera ISP supports only NV12 output. */
161 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
162 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
163 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
164 				   hw_protected);
165 
166 	/* P010 linear can be used for scanout too. */
167 	drv_modify_combination(drv, DRM_FORMAT_P010, &metadata_linear, BO_USE_SCANOUT);
168 
169 	/*
170 	 * Android also frequently requests YV12 formats for some camera implementations
171 	 * (including the external provider implementation).
172 	 */
173 	drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &metadata_linear,
174 			       BO_USE_CAMERA_WRITE);
175 
176 	/* Android CTS tests require this. */
177 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
178 
179 	/*
180 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
181 	 * from camera and input/output from hardware decoder/encoder.
182 	 */
183 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
184 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
185 				   BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
186 				   BO_USE_SENSOR_DIRECT_DATA);
187 
188 	const uint64_t render_not_linear = unset_flags(render, linear_mask);
189 	const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
190 	struct format_metadata metadata_x_tiled = { .tiling = XE_TILING_X,
191 						    .priority = 2,
192 						    .modifier = I915_FORMAT_MOD_X_TILED };
193 
194 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
195 			     render_not_linear);
196 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
197 			     &metadata_x_tiled, scanout_and_render_not_linear);
198 
199 	const uint64_t nv12_usage =
200 	    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
201 	const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected |
202 				    (xe->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
203 
204 	if (xe->is_mtl_or_newer) {
205 		struct format_metadata metadata_4_tiled = { .tiling = XE_TILING_4,
206 							    .priority = 3,
207 							    .modifier = I915_FORMAT_MOD_4_TILED };
208 
209 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
210 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
211 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
212 				     &metadata_4_tiled, render_not_linear);
213 		drv_add_combinations(drv, scanout_render_formats,
214 				     ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
215 				     scanout_and_render_not_linear);
216 	} else {
217 		struct format_metadata metadata_y_tiled = { .tiling = XE_TILING_Y,
218 							    .priority = 3,
219 							    .modifier = I915_FORMAT_MOD_Y_TILED };
220 
221 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
222 				     &metadata_y_tiled, render_not_linear);
223 		drv_add_combinations(drv, scanout_render_formats,
224 				     ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
225 				     scanout_and_render_not_linear);
226 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
227 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
228 	}
229 	return 0;
230 }
231 
xe_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)232 static int xe_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
233 				 uint32_t *aligned_height)
234 {
235 	uint32_t horizontal_alignment = 0;
236 	uint32_t vertical_alignment = 0;
237 
238 	switch (tiling) {
239 	default:
240 	case XE_TILING_NONE:
241 		/*
242 		 * The Intel GPU doesn't need any alignment in linear mode,
243 		 * but libva requires the allocation stride to be aligned to
244 		 * 16 bytes and height to 4 rows. Further, we round up the
245 		 * horizontal alignment so that row start on a cache line (64
246 		 * bytes).
247 		 */
248 #ifdef LINEAR_ALIGN_256
249 		/*
250 		 * If we want to import these buffers to amdgpu they need to
251 		 * their match LINEAR_ALIGNED requirement of 256 byte alignment.
252 		 */
253 		horizontal_alignment = 256;
254 #else
255 		horizontal_alignment = 64;
256 #endif
257 		/*
258 		 * For hardware video encoding buffers, we want to align to the size of a
259 		 * macroblock, because otherwise we will end up encoding uninitialized data.
260 		 * This can result in substantial quality degradations, especially on lower
261 		 * resolution videos, because this uninitialized data may be high entropy.
262 		 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
263 		 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
264 		 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
265 		 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
266 		 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
267 		 * will cause the GPU to read out-of-bounds.
268 		 *
269 		 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
270 		 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
271 		 * virtgpu_cross_domain backend which passes that flag through.
272 		 */
273 		if (bo->meta.use_flags & BO_USE_HW_VIDEO_ENCODER) {
274 			vertical_alignment = 8;
275 		} else if (format == DRM_FORMAT_R8 && *aligned_height == 1)
276 			vertical_alignment = 1;
277 		else
278 			vertical_alignment = 4;
279 
280 		break;
281 	case XE_TILING_X:
282 		horizontal_alignment = 512;
283 		vertical_alignment = 8;
284 		break;
285 
286 	case XE_TILING_Y:
287 	case XE_TILING_4:
288 		horizontal_alignment = 128;
289 		vertical_alignment = 32;
290 		break;
291 	}
292 
293 	*aligned_height = ALIGN(*aligned_height, vertical_alignment);
294 	*stride = ALIGN(*stride, horizontal_alignment);
295 
296 	return 0;
297 }
298 
xe_query_config(struct driver * drv,struct xe_device * xe)299 static bool xe_query_config(struct driver *drv, struct xe_device *xe)
300 {
301 	struct drm_xe_device_query query = {
302 		.query = DRM_XE_DEVICE_QUERY_CONFIG,
303 	};
304 	if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
305 		return false;
306 
307 	struct drm_xe_query_config *config = calloc(1, query.size);
308 	if(!config)
309 		return false;
310 
311 	query.data = (uintptr_t)config;
312 	if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
313 		goto data_query_failed;
314 
315 
316 	if(config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM)
317 		xe->has_local_mem = true;
318 	else
319 		xe->has_local_mem = false;
320 
321 	xe->revision = (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xFFFF;
322 	xe->gtt_size = 1ull << config->info[DRM_XE_QUERY_CONFIG_VA_BITS];
323 	xe->mem_alignment = config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT];
324 
325 	free(config);
326 	return true;
327 
328 data_query_failed:
329 	free(config);
330 	return false;
331 }
332 
xe_device_probe(struct driver * drv,struct xe_device * xe)333 static bool xe_device_probe(struct driver *drv, struct xe_device *xe)
334 {
335 	/* Retrieve the device info by querying KMD through IOCTL
336 	*/
337 	struct drm_xe_device_query query = {
338 		.extensions = 0,
339 		.query = DRM_XE_DEVICE_QUERY_CONFIG,
340 		.size = 0,
341 		.data = 0,
342 	};
343 
344 	if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query))
345 		return false;
346 
347 	struct drm_xe_query_config *config = calloc(1, query.size);
348 	if(!config)
349 		return false;
350 
351 	query.data = (uintptr_t)config;
352 	if(drmIoctl(drv->fd, DRM_IOCTL_XE_DEVICE_QUERY, &query)){
353 		free(config);
354 		return false;
355 	}
356 
357 	xe->device_id = ((config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] << 16)>>16) & 0xFFFF;
358 	xe->revision = (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xFFFF;
359 
360 	free(config);
361 	return true;
362 }
363 
xe_init(struct driver * drv)364 static int xe_init(struct driver *drv)
365 {
366 	struct xe_device *xe;
367 
368 	xe = calloc(1, sizeof(*xe));
369 	if (!xe)
370 		return -ENOMEM;
371 
372 	if(!xe_device_probe(drv, xe)){
373 		drv_loge("Failed to query device id using DRM_IOCTL_XE_DEVICE_QUERY");
374 		return -EINVAL;
375 	}
376 
377 	xe_query_config(drv, xe);
378 
379 	/* must call before xe->graphics_version is used anywhere else */
380 	xe_info_from_device_id(xe);
381 
382 	xe_get_modifier_order(xe);
383 
384 	/* Xe still don't have support for protected content */
385 	if (xe->graphics_version >= 12)
386 		xe->has_hw_protection = 0;
387 	else if (xe->graphics_version < 12) {
388 		drv_loge("Xe driver is not supported on your platform: 0x%x\n",xe->device_id);
389 		return -errno;
390 	}
391 
392 	drv->priv = xe;
393 
394 	return xe_add_combinations(drv);
395 return 0;
396 }
397 
398 /*
399  * Returns true if the height of a buffer of the given format should be aligned
400  * to the largest coded unit (LCU) assuming that it will be used for video. This
401  * is based on gmmlib's GmmIsYUVFormatLCUAligned().
402  */
xe_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct xe_device * xe)403 static bool xe_format_needs_LCU_alignment(uint32_t format, size_t plane,
404 					    const struct xe_device *xe)
405 {
406 	switch (format) {
407 	case DRM_FORMAT_NV12:
408 	case DRM_FORMAT_P010:
409 	case DRM_FORMAT_P016:
410 		return (xe->graphics_version >= 12) && plane == 1;
411 	}
412 	return false;
413 }
414 
xe_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)415 static int xe_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
416 {
417 	uint32_t offset;
418 	size_t plane;
419 	int ret, pagesize;
420 	struct xe_device *xe = bo->drv->priv;
421 
422 	offset = 0;
423 	pagesize = getpagesize();
424 
425 	for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
426 		uint32_t stride = drv_stride_from_format(format, width, plane);
427 		uint32_t plane_height = drv_height_from_format(format, height, plane);
428 
429 		if (bo->meta.tiling != XE_TILING_NONE)
430 			assert(IS_ALIGNED(offset, pagesize));
431 
432 		ret = xe_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
433 		if (ret)
434 			return ret;
435 
436 		if (xe_format_needs_LCU_alignment(format, plane, xe)) {
437 			/*
438 			 * Align the height of the V plane for certain formats to the
439 			 * largest coded unit (assuming that this BO may be used for video)
440 			 * to be consistent with gmmlib.
441 			 */
442 			plane_height = ALIGN(plane_height, 64);
443 		}
444 
445 		bo->meta.strides[plane] = stride;
446 		bo->meta.sizes[plane] = stride * plane_height;
447 		bo->meta.offsets[plane] = offset;
448 		offset += bo->meta.sizes[plane];
449 	}
450 
451 	bo->meta.total_size = ALIGN(offset, pagesize);
452 
453 	return 0;
454 }
455 
xe_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)456 static size_t xe_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
457 {
458 	size_t num_planes = drv_num_planes_from_format(format);
459 
460 	if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
461 	    modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
462 		assert(num_planes == 1);
463 		return 2;
464 	}
465 
466 	return num_planes;
467 }
468 
xe_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)469 static int xe_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
470 				    uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
471 {
472 	int ret = 0;
473 	uint64_t modifier;
474 	struct xe_device *xe = bo->drv->priv;
475 
476 	if (modifiers) {
477 		modifier =
478 		    drv_pick_modifier(modifiers, count, xe->modifier.order, xe->modifier.count);
479 	} else {
480 		struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
481 		if (!combo)
482 			return -EINVAL;
483 
484 		if ((xe->is_mtl_or_newer) &&
485 		    (use_flags == (BO_USE_SCANOUT | BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER))) {
486 			modifier = I915_FORMAT_MOD_4_TILED;
487 		} else {
488 			modifier = combo->metadata.modifier;
489 		}
490 	}
491 
492 	/*
493 	 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
494 	 * Pick y tiled modifier if it has been passed in, otherwise use linear
495 	 */
496 	if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
497 		uint32_t i;
498 		for (i = 0; modifiers && i < count; i++) {
499 			if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
500 				break;
501 		}
502 		if (i == count)
503 			modifier = DRM_FORMAT_MOD_LINEAR;
504 		else
505 			modifier = I915_FORMAT_MOD_Y_TILED;
506 	}
507 
508 	switch (modifier) {
509 	case DRM_FORMAT_MOD_LINEAR:
510 		bo->meta.tiling = XE_TILING_NONE;
511 		break;
512 	case I915_FORMAT_MOD_X_TILED:
513 		bo->meta.tiling = XE_TILING_X;
514 		break;
515 	case I915_FORMAT_MOD_Y_TILED:
516 	case I915_FORMAT_MOD_Y_TILED_CCS:
517 	/* For now support only I915_TILING_Y as this works with all
518 	 * IPs(render/media/display)
519 	 */
520 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
521 		bo->meta.tiling = XE_TILING_Y;
522 		break;
523 	case I915_FORMAT_MOD_4_TILED:
524 		bo->meta.tiling = XE_TILING_4;
525 		break;
526 	}
527 
528 	bo->meta.format_modifier = modifier;
529 
530 	if (format == DRM_FORMAT_YVU420_ANDROID) {
531 		/*
532 		 * We only need to be able to use this as a linear texture,
533 		 * which doesn't put any HW restrictions on how we lay it
534 		 * out. The Android format does require the stride to be a
535 		 * multiple of 16 and expects the Cr and Cb stride to be
536 		 * ALIGN(Y_stride / 2, 16), which we can make happen by
537 		 * aligning to 32 bytes here.
538 		 */
539 		uint32_t stride = ALIGN(width, 32);
540 		ret = drv_bo_from_format(bo, stride, 1, height, format);
541 		bo->meta.total_size = ALIGN(bo->meta.total_size, getpagesize());
542 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
543 		/*
544 		 * For compressed surfaces, we need a color control surface
545 		 * (CCS). Color compression is only supported for Y tiled
546 		 * surfaces, and for each 32x16 tiles in the main surface we
547 		 * need a tile in the control surface.  Y tiles are 128 bytes
548 		 * wide and 32 lines tall and we use that to first compute the
549 		 * width and height in tiles of the main surface. stride and
550 		 * height are already multiples of 128 and 32, respectively:
551 		 */
552 		uint32_t stride = drv_stride_from_format(format, width, 0);
553 		uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
554 		uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
555 		uint32_t size = width_in_tiles * height_in_tiles * 4096;
556 		uint32_t offset = 0;
557 
558 		bo->meta.strides[0] = width_in_tiles * 128;
559 		bo->meta.sizes[0] = size;
560 		bo->meta.offsets[0] = offset;
561 		offset += size;
562 
563 		/*
564 		 * Now, compute the width and height in tiles of the control
565 		 * surface by dividing and rounding up.
566 		 */
567 		uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
568 		uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
569 		uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
570 
571 		/*
572 		 * With stride and height aligned to y tiles, offset is
573 		 * already a multiple of 4096, which is the required alignment
574 		 * of the CCS.
575 		 */
576 		bo->meta.strides[1] = ccs_width_in_tiles * 128;
577 		bo->meta.sizes[1] = ccs_size;
578 		bo->meta.offsets[1] = offset;
579 		offset += ccs_size;
580 
581 		bo->meta.num_planes = xe_num_planes_from_modifier(bo->drv, format, modifier);
582 		bo->meta.total_size = offset;
583 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
584 		/*
585 		 * considering only 128 byte compression and one cache line of
586 		 * aux buffer(64B) contains compression status of 4-Y tiles.
587 		 * Which is 4 * (128B * 32L).
588 		 * line stride(bytes) is 4 * 128B
589 		 * and tile stride(lines) is 32L
590 		 */
591 		uint32_t stride = ALIGN(drv_stride_from_format(format, width, 0), 512);
592 
593 		height = ALIGN(drv_height_from_format(format, height, 0), 32);
594 
595 		if (xe->is_xelpd && (stride > 1)) {
596 			stride = 1 << (32 - __builtin_clz(stride - 1));
597 			height = ALIGN(drv_height_from_format(format, height, 0), 128);
598 		}
599 
600 		bo->meta.strides[0] = stride;
601 		/* size calculation and alignment are 64KB aligned
602 		 * size as per spec
603 		 */
604 		bo->meta.sizes[0] = ALIGN(stride * height, 65536);
605 		bo->meta.offsets[0] = 0;
606 
607 		/* Aux buffer is linear and page aligned. It is placed after
608 		 * other planes and aligned to main buffer stride.
609 		 */
610 		bo->meta.strides[1] = bo->meta.strides[0] / 8;
611 		/* Aligned to page size */
612 		bo->meta.sizes[1] = ALIGN(bo->meta.sizes[0] / 256, getpagesize());
613 		bo->meta.offsets[1] = bo->meta.sizes[0];
614 		/* Total number of planes & sizes */
615 		bo->meta.num_planes = xe_num_planes_from_modifier(bo->drv, format, modifier);
616 		bo->meta.total_size = bo->meta.sizes[0] + bo->meta.sizes[1];
617 	} else {
618 		ret = xe_bo_from_format(bo, width, height, format);
619 	}
620 
621 	return ret;
622 }
623 
xe_bo_create_from_metadata(struct bo * bo)624 static int xe_bo_create_from_metadata(struct bo *bo)
625 {
626 	int ret;
627 
628 	uint32_t flags = 0;
629 	uint32_t cpu_caching;
630 	if (bo->meta.use_flags & BO_USE_SCANOUT) {
631 		flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
632 		cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
633 	} else {
634 		cpu_caching = DRM_XE_GEM_CPU_CACHING_WB;
635 	}
636 
637 	struct drm_xe_gem_create gem_create = {
638 	     .vm_id = 0, /* ensure exportable to PRIME fd */
639 	     .size = bo->meta.total_size,
640 	     .flags = flags,
641 	     .cpu_caching = cpu_caching,
642 	};
643 
644 	/* FIXME: let's assume iGPU with SYSMEM is only supported */
645 	gem_create.placement |= BITFIELD_BIT(DRM_XE_MEM_REGION_CLASS_SYSMEM);
646 
647 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
648 	if (ret)
649 		return -errno;
650 
651 	bo->handle.u32 = gem_create.handle;
652 
653 	return 0;
654 }
655 
xe_close(struct driver * drv)656 static void xe_close(struct driver *drv)
657 {
658 	free(drv->priv);
659 	drv->priv = NULL;
660 }
661 
xe_bo_import(struct bo * bo,struct drv_import_fd_data * data)662 static int xe_bo_import(struct bo *bo, struct drv_import_fd_data *data)
663 {
664 	int ret;
665 
666 	bo->meta.num_planes =
667 	    xe_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
668 
669 	ret = drv_prime_bo_import(bo, data);
670 	if (ret)
671 		return ret;
672 
673 	return 0;
674 }
675 
xe_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)676 static void *xe_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
677 {
678 	int ret;
679 	void *addr = MAP_FAILED;
680 
681 	struct drm_xe_gem_mmap_offset gem_map = {
682 		.handle = bo->handle.u32,
683 	};
684 
685 	/* Get the fake offset back */
686 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &gem_map);
687 	if (ret == 0) {
688 		addr = mmap(0, bo->meta.total_size, PROT_READ | PROT_WRITE,
689 			    MAP_SHARED, bo->drv->fd, gem_map.offset);
690 	}
691 
692 	if (addr == MAP_FAILED) {
693 		drv_loge("xe GEM mmap failed\n");
694 		return addr;
695 	}
696 
697 	vma->length = bo->meta.total_size;
698 
699 	return addr;
700 }
701 
702 #define XE_CACHELINE_SIZE 64
703 #define XE_CACHELINE_MASK (XE_CACHELINE_SIZE - 1)
xe_clflush(void * start,size_t size)704 static void xe_clflush(void *start, size_t size)
705 {
706 	/* copy of i915_clflush() */
707 	void *p = (void *)(((uintptr_t)start) & ~XE_CACHELINE_MASK);
708 	void *end = (void *)((uintptr_t)start + size);
709 
710 	__builtin_ia32_mfence();
711 	while (p < end) {
712 #if defined(__CLFLUSHOPT__)
713 		__builtin_ia32_clflushopt(p);
714 #else
715 		__builtin_ia32_clflush(p);
716 #endif
717 		p = (void *)((uintptr_t)p + XE_CACHELINE_SIZE);
718 	}
719 	__builtin_ia32_mfence();
720 }
721 
xe_bo_flush(struct bo * bo,struct mapping * mapping)722 static int xe_bo_flush(struct bo *bo, struct mapping *mapping)
723 {
724 	if (bo->meta.tiling == XE_TILING_NONE) {
725 		xe_clflush(mapping->vma->addr, mapping->vma->length);
726 	}
727 
728 	return 0;
729 }
730 
731 const struct backend backend_xe = {
732 	.name = "xe",
733 	.init = xe_init,
734 	.close = xe_close,
735 	.bo_compute_metadata = xe_bo_compute_metadata,
736 	.bo_create_from_metadata = xe_bo_create_from_metadata,
737 	.bo_map = xe_bo_map,
738 	.bo_destroy = drv_gem_bo_destroy,
739 	.bo_unmap = drv_bo_munmap,
740 	.num_planes_from_modifier = xe_num_planes_from_modifier,
741 	.bo_import = xe_bo_import,
742 	.bo_flush = xe_bo_flush,
743 	.resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
744 };
745 
746 #endif
747