• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 #ifdef DRV_AMDGPU
7 #include <amdgpu.h>
8 #include <amdgpu_drm.h>
9 #include <errno.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <xf86drm.h>
15 
16 #include "dri.h"
17 #include "drv_priv.h"
18 #include "helpers.h"
19 #include "util.h"
20 
21 // clang-format off
22 #define DRI_PATH STRINGIZE(DRI_DRIVER_DIR/radeonsi_dri.so)
23 // clang-format on
24 
25 #define TILE_TYPE_LINEAR 0
26 /* DRI backend decides tiling in this case. */
27 #define TILE_TYPE_DRI 1
28 
29 /* Height alignement for Encoder/Decoder buffers */
30 #define CHROME_HEIGHT_ALIGN 16
31 
32 struct amdgpu_priv {
33 	struct dri_driver dri;
34 	int drm_version;
35 
36 	/* sdma */
37 	struct drm_amdgpu_info_device dev_info;
38 	uint32_t sdma_ctx;
39 	uint32_t sdma_cmdbuf_bo;
40 	uint64_t sdma_cmdbuf_addr;
41 	uint64_t sdma_cmdbuf_size;
42 	uint32_t *sdma_cmdbuf_map;
43 };
44 
45 struct amdgpu_linear_vma_priv {
46 	uint32_t handle;
47 	uint32_t map_flags;
48 };
49 
50 const static uint32_t render_target_formats[] = {
51 	DRM_FORMAT_ABGR8888,	  DRM_FORMAT_ARGB8888,	  DRM_FORMAT_RGB565,
52 	DRM_FORMAT_XBGR8888,	  DRM_FORMAT_XRGB8888,	  DRM_FORMAT_ABGR2101010,
53 	DRM_FORMAT_ARGB2101010,	  DRM_FORMAT_XBGR2101010, DRM_FORMAT_XRGB2101010,
54 	DRM_FORMAT_ABGR16161616F,
55 };
56 
57 const static uint32_t texture_source_formats[] = {
58 	DRM_FORMAT_GR88,	   DRM_FORMAT_R8,     DRM_FORMAT_NV21, DRM_FORMAT_NV12,
59 	DRM_FORMAT_YVU420_ANDROID, DRM_FORMAT_YVU420, DRM_FORMAT_P010
60 };
61 
query_dev_info(int fd,struct drm_amdgpu_info_device * dev_info)62 static int query_dev_info(int fd, struct drm_amdgpu_info_device *dev_info)
63 {
64 	struct drm_amdgpu_info info_args = { 0 };
65 
66 	info_args.return_pointer = (uintptr_t)dev_info;
67 	info_args.return_size = sizeof(*dev_info);
68 	info_args.query = AMDGPU_INFO_DEV_INFO;
69 
70 	return drmCommandWrite(fd, DRM_AMDGPU_INFO, &info_args, sizeof(info_args));
71 }
72 
sdma_init(struct amdgpu_priv * priv,int fd)73 static int sdma_init(struct amdgpu_priv *priv, int fd)
74 {
75 	union drm_amdgpu_ctx ctx_args = { { 0 } };
76 	union drm_amdgpu_gem_create gem_create = { { 0 } };
77 	struct drm_amdgpu_gem_va va_args = { 0 };
78 	union drm_amdgpu_gem_mmap gem_map = { { 0 } };
79 	struct drm_gem_close gem_close = { 0 };
80 	int ret;
81 
82 	/* Ensure we can make a submission without BO lists. */
83 	if (priv->drm_version < 27)
84 		return 0;
85 
86 	/* Anything outside this range needs adjustments to the SDMA copy commands */
87 	if (priv->dev_info.family < AMDGPU_FAMILY_CI || priv->dev_info.family > AMDGPU_FAMILY_NV)
88 		return 0;
89 
90 	ctx_args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
91 
92 	ret = drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
93 	if (ret < 0)
94 		return ret;
95 
96 	priv->sdma_ctx = ctx_args.out.alloc.ctx_id;
97 
98 	priv->sdma_cmdbuf_size = ALIGN(4096, priv->dev_info.virtual_address_alignment);
99 	gem_create.in.bo_size = priv->sdma_cmdbuf_size;
100 	gem_create.in.alignment = 4096;
101 	gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
102 
103 	ret = drmCommandWriteRead(fd, DRM_AMDGPU_GEM_CREATE, &gem_create, sizeof(gem_create));
104 	if (ret < 0)
105 		goto fail_ctx;
106 
107 	priv->sdma_cmdbuf_bo = gem_create.out.handle;
108 
109 	priv->sdma_cmdbuf_addr =
110 	    ALIGN(priv->dev_info.virtual_address_offset, priv->dev_info.virtual_address_alignment);
111 
112 	/* Map the buffer into the GPU address space so we can use it from the GPU */
113 	va_args.handle = priv->sdma_cmdbuf_bo;
114 	va_args.operation = AMDGPU_VA_OP_MAP;
115 	va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
116 	va_args.va_address = priv->sdma_cmdbuf_addr;
117 	va_args.offset_in_bo = 0;
118 	va_args.map_size = priv->sdma_cmdbuf_size;
119 
120 	ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
121 	if (ret)
122 		goto fail_bo;
123 
124 	gem_map.in.handle = priv->sdma_cmdbuf_bo;
125 	ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_map);
126 	if (ret)
127 		goto fail_va;
128 
129 	priv->sdma_cmdbuf_map = mmap(0, priv->sdma_cmdbuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
130 				     fd, gem_map.out.addr_ptr);
131 	if (priv->sdma_cmdbuf_map == MAP_FAILED) {
132 		priv->sdma_cmdbuf_map = NULL;
133 		ret = -ENOMEM;
134 		goto fail_va;
135 	}
136 
137 	return 0;
138 fail_va:
139 	va_args.operation = AMDGPU_VA_OP_UNMAP;
140 	va_args.flags = 0;
141 	drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
142 fail_bo:
143 	gem_close.handle = priv->sdma_cmdbuf_bo;
144 	drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
145 fail_ctx:
146 	memset(&ctx_args, 0, sizeof(ctx_args));
147 	ctx_args.in.op = AMDGPU_CTX_OP_FREE_CTX;
148 	ctx_args.in.ctx_id = priv->sdma_ctx;
149 	drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
150 	return ret;
151 }
152 
sdma_finish(struct amdgpu_priv * priv,int fd)153 static void sdma_finish(struct amdgpu_priv *priv, int fd)
154 {
155 	union drm_amdgpu_ctx ctx_args = { { 0 } };
156 	struct drm_amdgpu_gem_va va_args = { 0 };
157 	struct drm_gem_close gem_close = { 0 };
158 
159 	if (!priv->sdma_cmdbuf_map)
160 		return;
161 
162 	va_args.handle = priv->sdma_cmdbuf_bo;
163 	va_args.operation = AMDGPU_VA_OP_UNMAP;
164 	va_args.flags = 0;
165 	va_args.va_address = priv->sdma_cmdbuf_addr;
166 	va_args.offset_in_bo = 0;
167 	va_args.map_size = priv->sdma_cmdbuf_size;
168 	drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
169 
170 	gem_close.handle = priv->sdma_cmdbuf_bo;
171 	drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
172 
173 	ctx_args.in.op = AMDGPU_CTX_OP_FREE_CTX;
174 	ctx_args.in.ctx_id = priv->sdma_ctx;
175 	drmCommandWriteRead(fd, DRM_AMDGPU_CTX, &ctx_args, sizeof(ctx_args));
176 }
177 
sdma_copy(struct amdgpu_priv * priv,int fd,uint32_t src_handle,uint32_t dst_handle,uint64_t size)178 static int sdma_copy(struct amdgpu_priv *priv, int fd, uint32_t src_handle, uint32_t dst_handle,
179 		     uint64_t size)
180 {
181 	const uint64_t max_size_per_cmd = 0x3fff00;
182 	const uint32_t cmd_size = 7 * sizeof(uint32_t); /* 7 dwords, see loop below. */
183 	const uint64_t max_commands = priv->sdma_cmdbuf_size / cmd_size;
184 	uint64_t src_addr = priv->sdma_cmdbuf_addr + priv->sdma_cmdbuf_size;
185 	uint64_t dst_addr = src_addr + size;
186 	struct drm_amdgpu_gem_va va_args = { 0 };
187 	unsigned cmd = 0;
188 	uint64_t remaining_size = size;
189 	uint64_t cur_src_addr = src_addr;
190 	uint64_t cur_dst_addr = dst_addr;
191 	struct drm_amdgpu_cs_chunk_ib ib = { 0 };
192 	struct drm_amdgpu_cs_chunk chunks[2] = { { 0 } };
193 	uint64_t chunk_ptrs[2];
194 	union drm_amdgpu_cs cs = { { 0 } };
195 	struct drm_amdgpu_bo_list_in bo_list = { 0 };
196 	struct drm_amdgpu_bo_list_entry bo_list_entries[3] = { { 0 } };
197 	union drm_amdgpu_wait_cs wait_cs = { { 0 } };
198 	int ret = 0;
199 
200 	if (size > UINT64_MAX - max_size_per_cmd ||
201 	    DIV_ROUND_UP(size, max_size_per_cmd) > max_commands)
202 		return -ENOMEM;
203 
204 	/* Map both buffers into the GPU address space so we can access them from the GPU. */
205 	va_args.handle = src_handle;
206 	va_args.operation = AMDGPU_VA_OP_MAP;
207 	va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_DELAY_UPDATE;
208 	va_args.va_address = src_addr;
209 	va_args.map_size = size;
210 
211 	ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
212 	if (ret)
213 		return ret;
214 
215 	va_args.handle = dst_handle;
216 	va_args.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_DELAY_UPDATE;
217 	va_args.va_address = dst_addr;
218 
219 	ret = drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
220 	if (ret)
221 		goto unmap_src;
222 
223 	while (remaining_size) {
224 		uint64_t cur_size = remaining_size;
225 		if (cur_size > max_size_per_cmd)
226 			cur_size = max_size_per_cmd;
227 
228 		priv->sdma_cmdbuf_map[cmd++] = 0x01; /* linear copy */
229 		priv->sdma_cmdbuf_map[cmd++] =
230 		    priv->dev_info.family >= AMDGPU_FAMILY_AI ? (cur_size - 1) : cur_size;
231 		priv->sdma_cmdbuf_map[cmd++] = 0;
232 		priv->sdma_cmdbuf_map[cmd++] = cur_src_addr;
233 		priv->sdma_cmdbuf_map[cmd++] = cur_src_addr >> 32;
234 		priv->sdma_cmdbuf_map[cmd++] = cur_dst_addr;
235 		priv->sdma_cmdbuf_map[cmd++] = cur_dst_addr >> 32;
236 
237 		remaining_size -= cur_size;
238 		cur_src_addr += cur_size;
239 		cur_dst_addr += cur_size;
240 	}
241 
242 	ib.va_start = priv->sdma_cmdbuf_addr;
243 	ib.ib_bytes = cmd * 4;
244 	ib.ip_type = AMDGPU_HW_IP_DMA;
245 
246 	chunks[1].chunk_id = AMDGPU_CHUNK_ID_IB;
247 	chunks[1].length_dw = sizeof(ib) / 4;
248 	chunks[1].chunk_data = (uintptr_t)&ib;
249 
250 	bo_list_entries[0].bo_handle = priv->sdma_cmdbuf_bo;
251 	bo_list_entries[0].bo_priority = 8; /* Middle of range, like RADV. */
252 	bo_list_entries[1].bo_handle = src_handle;
253 	bo_list_entries[1].bo_priority = 8;
254 	bo_list_entries[2].bo_handle = dst_handle;
255 	bo_list_entries[2].bo_priority = 8;
256 
257 	bo_list.bo_number = 3;
258 	bo_list.bo_info_size = sizeof(bo_list_entries[0]);
259 	bo_list.bo_info_ptr = (uintptr_t)bo_list_entries;
260 
261 	chunks[0].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
262 	chunks[0].length_dw = sizeof(bo_list) / 4;
263 	chunks[0].chunk_data = (uintptr_t)&bo_list;
264 
265 	chunk_ptrs[0] = (uintptr_t)&chunks[0];
266 	chunk_ptrs[1] = (uintptr_t)&chunks[1];
267 
268 	cs.in.ctx_id = priv->sdma_ctx;
269 	cs.in.num_chunks = 2;
270 	cs.in.chunks = (uintptr_t)chunk_ptrs;
271 
272 	ret = drmCommandWriteRead(fd, DRM_AMDGPU_CS, &cs, sizeof(cs));
273 	if (ret) {
274 		drv_log("SDMA copy command buffer submission failed %d\n", ret);
275 		goto unmap_dst;
276 	}
277 
278 	wait_cs.in.handle = cs.out.handle;
279 	wait_cs.in.ip_type = AMDGPU_HW_IP_DMA;
280 	wait_cs.in.ctx_id = priv->sdma_ctx;
281 	wait_cs.in.timeout = INT64_MAX;
282 
283 	ret = drmCommandWriteRead(fd, DRM_AMDGPU_WAIT_CS, &wait_cs, sizeof(wait_cs));
284 	if (ret) {
285 		drv_log("Could not wait for CS to finish\n");
286 	} else if (wait_cs.out.status) {
287 		drv_log("Infinite wait timed out, likely GPU hang.\n");
288 		ret = -ENODEV;
289 	}
290 
291 unmap_dst:
292 	va_args.handle = dst_handle;
293 	va_args.operation = AMDGPU_VA_OP_UNMAP;
294 	va_args.flags = AMDGPU_VM_DELAY_UPDATE;
295 	va_args.va_address = dst_addr;
296 	drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
297 
298 unmap_src:
299 	va_args.handle = src_handle;
300 	va_args.operation = AMDGPU_VA_OP_UNMAP;
301 	va_args.flags = AMDGPU_VM_DELAY_UPDATE;
302 	va_args.va_address = src_addr;
303 	drmCommandWrite(fd, DRM_AMDGPU_GEM_VA, &va_args, sizeof(va_args));
304 
305 	return ret;
306 }
307 
amdgpu_init(struct driver * drv)308 static int amdgpu_init(struct driver *drv)
309 {
310 	struct amdgpu_priv *priv;
311 	drmVersionPtr drm_version;
312 	struct format_metadata metadata;
313 	uint64_t use_flags = BO_USE_RENDER_MASK;
314 
315 	priv = calloc(1, sizeof(struct amdgpu_priv));
316 	if (!priv)
317 		return -ENOMEM;
318 
319 	drm_version = drmGetVersion(drv_get_fd(drv));
320 	if (!drm_version) {
321 		free(priv);
322 		return -ENODEV;
323 	}
324 
325 	priv->drm_version = drm_version->version_minor;
326 	drmFreeVersion(drm_version);
327 
328 	drv->priv = priv;
329 
330 	if (query_dev_info(drv_get_fd(drv), &priv->dev_info)) {
331 		free(priv);
332 		drv->priv = NULL;
333 		return -ENODEV;
334 	}
335 	if (dri_init(drv, DRI_PATH, "radeonsi")) {
336 		free(priv);
337 		drv->priv = NULL;
338 		return -ENODEV;
339 	}
340 
341 	/* Continue on failure, as we can still succesfully map things without SDMA. */
342 	if (sdma_init(priv, drv_get_fd(drv)))
343 		drv_log("SDMA init failed\n");
344 
345 	metadata.tiling = TILE_TYPE_LINEAR;
346 	metadata.priority = 1;
347 	metadata.modifier = DRM_FORMAT_MOD_LINEAR;
348 
349 	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
350 			     &metadata, use_flags);
351 
352 	drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
353 			     &metadata, BO_USE_TEXTURE_MASK);
354 
355 	/* NV12 format for camera, display, decoding and encoding. */
356 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
357 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
358 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
359 				   BO_USE_PROTECTED);
360 
361 	drv_modify_combination(drv, DRM_FORMAT_P010, &metadata,
362 			       BO_USE_SCANOUT | BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
363 				   BO_USE_PROTECTED);
364 
365 	/* Android CTS tests require this. */
366 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata, BO_USE_SW_MASK);
367 
368 	/* Linear formats supported by display. */
369 	drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
370 	drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
371 	drv_modify_combination(drv, DRM_FORMAT_ABGR8888, &metadata, BO_USE_SCANOUT);
372 	drv_modify_combination(drv, DRM_FORMAT_XBGR8888, &metadata, BO_USE_SCANOUT);
373 
374 	drv_modify_combination(drv, DRM_FORMAT_ABGR2101010, &metadata, BO_USE_SCANOUT);
375 	drv_modify_combination(drv, DRM_FORMAT_ARGB2101010, &metadata, BO_USE_SCANOUT);
376 	drv_modify_combination(drv, DRM_FORMAT_XBGR2101010, &metadata, BO_USE_SCANOUT);
377 	drv_modify_combination(drv, DRM_FORMAT_XRGB2101010, &metadata, BO_USE_SCANOUT);
378 
379 	drv_modify_combination(drv, DRM_FORMAT_NV21, &metadata, BO_USE_SCANOUT);
380 
381 	/*
382 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
383 	 * from camera and input/output from hardware decoder/encoder.
384 	 */
385 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata,
386 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
387 				   BO_USE_HW_VIDEO_ENCODER);
388 
389 	/*
390 	 * The following formats will be allocated by the DRI backend and may be potentially tiled.
391 	 * Since format modifier support hasn't been implemented fully yet, it's not
392 	 * possible to enumerate the different types of buffers (like i915 can).
393 	 */
394 	use_flags &= ~BO_USE_RENDERSCRIPT;
395 	use_flags &= ~BO_USE_SW_WRITE_OFTEN;
396 	use_flags &= ~BO_USE_SW_READ_OFTEN;
397 	use_flags &= ~BO_USE_LINEAR;
398 
399 	metadata.tiling = TILE_TYPE_DRI;
400 	metadata.priority = 2;
401 
402 	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
403 			     &metadata, use_flags);
404 
405 	/* Potentially tiled formats supported by display. */
406 	drv_modify_combination(drv, DRM_FORMAT_ARGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
407 	drv_modify_combination(drv, DRM_FORMAT_XRGB8888, &metadata, BO_USE_CURSOR | BO_USE_SCANOUT);
408 	drv_modify_combination(drv, DRM_FORMAT_ABGR8888, &metadata, BO_USE_SCANOUT);
409 	drv_modify_combination(drv, DRM_FORMAT_XBGR8888, &metadata, BO_USE_SCANOUT);
410 
411 	drv_modify_combination(drv, DRM_FORMAT_ABGR2101010, &metadata, BO_USE_SCANOUT);
412 	drv_modify_combination(drv, DRM_FORMAT_ARGB2101010, &metadata, BO_USE_SCANOUT);
413 	drv_modify_combination(drv, DRM_FORMAT_XBGR2101010, &metadata, BO_USE_SCANOUT);
414 	drv_modify_combination(drv, DRM_FORMAT_XRGB2101010, &metadata, BO_USE_SCANOUT);
415 	return 0;
416 }
417 
amdgpu_close(struct driver * drv)418 static void amdgpu_close(struct driver *drv)
419 {
420 	sdma_finish(drv->priv, drv_get_fd(drv));
421 	dri_close(drv);
422 	free(drv->priv);
423 	drv->priv = NULL;
424 }
425 
amdgpu_create_bo_linear(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags)426 static int amdgpu_create_bo_linear(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
427 				   uint64_t use_flags)
428 {
429 	int ret;
430 	size_t num_planes;
431 	uint32_t plane, stride;
432 	union drm_amdgpu_gem_create gem_create = { { 0 } };
433 	struct amdgpu_priv *priv = bo->drv->priv;
434 
435 	stride = drv_stride_from_format(format, width, 0);
436 	num_planes = drv_num_planes_from_format(format);
437 
438 	/*
439 	 * For multiplane formats, align the stride to 512 to ensure that subsample strides are 256
440 	 * aligned. This uses more memory than necessary since the first plane only needs to be
441 	 * 256 aligned, but it's acceptable for a short-term fix. It's probably safe for other gpu
442 	 * families, but let's restrict it to Raven for now (b/171013552).
443 	 * */
444 	if (priv->dev_info.family == AMDGPU_FAMILY_RV && num_planes > 1)
445 		stride = ALIGN(stride, 512);
446 	else
447 		stride = ALIGN(stride, 256);
448 
449 	/*
450 	 * Currently, allocator used by chrome aligns the height for Encoder/
451 	 * Decoder buffers while allocator used by android(gralloc/minigbm)
452 	 * doesn't provide any aligment.
453 	 *
454 	 * See b/153130069
455 	 */
456 	if (use_flags & (BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER))
457 		height = ALIGN(height, CHROME_HEIGHT_ALIGN);
458 
459 	drv_bo_from_format(bo, stride, height, format);
460 
461 	gem_create.in.bo_size =
462 	    ALIGN(bo->meta.total_size, priv->dev_info.virtual_address_alignment);
463 	gem_create.in.alignment = 256;
464 	gem_create.in.domain_flags = 0;
465 
466 	if (use_flags & (BO_USE_LINEAR | BO_USE_SW_MASK))
467 		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
468 
469 	gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
470 
471 	/* Scanout in GTT requires USWC, otherwise try to use cachable memory
472 	 * for buffers that are read often, because uncacheable reads can be
473 	 * very slow. USWC should be faster on the GPU though. */
474 	if ((use_flags & BO_USE_SCANOUT) || !(use_flags & BO_USE_SW_READ_OFTEN))
475 		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
476 
477 	/* For protected data Buffer needs to be allocated from TMZ */
478 	if (use_flags & BO_USE_PROTECTED)
479 		gem_create.in.domain_flags |= AMDGPU_GEM_CREATE_ENCRYPTED;
480 
481 	/* Allocate the buffer with the preferred heap. */
482 	ret = drmCommandWriteRead(drv_get_fd(bo->drv), DRM_AMDGPU_GEM_CREATE, &gem_create,
483 				  sizeof(gem_create));
484 	if (ret < 0)
485 		return ret;
486 
487 	for (plane = 0; plane < bo->meta.num_planes; plane++)
488 		bo->handles[plane].u32 = gem_create.out.handle;
489 
490 	bo->meta.format_modifier = DRM_FORMAT_MOD_LINEAR;
491 
492 	return 0;
493 }
494 
amdgpu_create_bo(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags)495 static int amdgpu_create_bo(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
496 			    uint64_t use_flags)
497 {
498 	struct combination *combo;
499 	struct amdgpu_priv *priv = bo->drv->priv;
500 
501 	combo = drv_get_combination(bo->drv, format, use_flags);
502 	if (!combo)
503 		return -EINVAL;
504 
505 	if (combo->metadata.tiling == TILE_TYPE_DRI) {
506 		bool needs_alignment = false;
507 #ifdef __ANDROID__
508 		/*
509 		 * Currently, the gralloc API doesn't differentiate between allocation time and map
510 		 * time strides. A workaround for amdgpu DRI buffers is to always to align to 256 at
511 		 * allocation time.
512 		 *
513 		 * See b/115946221,b/117942643
514 		 */
515 		if (use_flags & (BO_USE_SW_MASK))
516 			needs_alignment = true;
517 #endif
518 		// See b/122049612
519 		if (use_flags & (BO_USE_SCANOUT) && priv->dev_info.family == AMDGPU_FAMILY_CZ)
520 			needs_alignment = true;
521 
522 		if (needs_alignment) {
523 			uint32_t bytes_per_pixel = drv_bytes_per_pixel_from_format(format, 0);
524 			width = ALIGN(width, 256 / bytes_per_pixel);
525 		}
526 
527 		return dri_bo_create(bo, width, height, format, use_flags);
528 	}
529 
530 	return amdgpu_create_bo_linear(bo, width, height, format, use_flags);
531 }
532 
amdgpu_create_bo_with_modifiers(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,const uint64_t * modifiers,uint32_t count)533 static int amdgpu_create_bo_with_modifiers(struct bo *bo, uint32_t width, uint32_t height,
534 					   uint32_t format, const uint64_t *modifiers,
535 					   uint32_t count)
536 {
537 	bool only_use_linear = true;
538 
539 	for (uint32_t i = 0; i < count; ++i)
540 		if (modifiers[i] != DRM_FORMAT_MOD_LINEAR)
541 			only_use_linear = false;
542 
543 	if (only_use_linear)
544 		return amdgpu_create_bo_linear(bo, width, height, format, BO_USE_SCANOUT);
545 
546 	return dri_bo_create_with_modifiers(bo, width, height, format, modifiers, count);
547 }
548 
amdgpu_import_bo(struct bo * bo,struct drv_import_fd_data * data)549 static int amdgpu_import_bo(struct bo *bo, struct drv_import_fd_data *data)
550 {
551 	bool dri_tiling = data->format_modifier != DRM_FORMAT_MOD_LINEAR;
552 	if (data->format_modifier == DRM_FORMAT_MOD_INVALID) {
553 		struct combination *combo;
554 		combo = drv_get_combination(bo->drv, data->format, data->use_flags);
555 		if (!combo)
556 			return -EINVAL;
557 
558 		dri_tiling = combo->metadata.tiling == TILE_TYPE_DRI;
559 	}
560 
561 	if (dri_tiling)
562 		return dri_bo_import(bo, data);
563 	else
564 		return drv_prime_bo_import(bo, data);
565 }
566 
amdgpu_destroy_bo(struct bo * bo)567 static int amdgpu_destroy_bo(struct bo *bo)
568 {
569 	if (bo->priv)
570 		return dri_bo_destroy(bo);
571 	else
572 		return drv_gem_bo_destroy(bo);
573 }
574 
amdgpu_map_bo(struct bo * bo,struct vma * vma,size_t plane,uint32_t map_flags)575 static void *amdgpu_map_bo(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
576 {
577 	void *addr = MAP_FAILED;
578 	int ret;
579 	union drm_amdgpu_gem_mmap gem_map = { { 0 } };
580 	struct drm_amdgpu_gem_create_in bo_info = { 0 };
581 	struct drm_amdgpu_gem_op gem_op = { 0 };
582 	uint32_t handle = bo->handles[plane].u32;
583 	struct amdgpu_linear_vma_priv *priv = NULL;
584 	struct amdgpu_priv *drv_priv;
585 
586 	if (bo->priv)
587 		return dri_bo_map(bo, vma, plane, map_flags);
588 
589 	drv_priv = bo->drv->priv;
590 	gem_op.handle = handle;
591 	gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO;
592 	gem_op.value = (uintptr_t)&bo_info;
593 
594 	ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_OP, &gem_op, sizeof(gem_op));
595 	if (ret)
596 		return MAP_FAILED;
597 
598 	vma->length = bo_info.bo_size;
599 
600 	if (((bo_info.domains & AMDGPU_GEM_DOMAIN_VRAM) ||
601 	     (bo_info.domain_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)) &&
602 	    drv_priv->sdma_cmdbuf_map) {
603 		union drm_amdgpu_gem_create gem_create = { { 0 } };
604 
605 		priv = calloc(1, sizeof(struct amdgpu_linear_vma_priv));
606 		if (!priv)
607 			return MAP_FAILED;
608 
609 		gem_create.in.bo_size = bo_info.bo_size;
610 		gem_create.in.alignment = 4096;
611 		gem_create.in.domains = AMDGPU_GEM_DOMAIN_GTT;
612 
613 		ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_CREATE, &gem_create,
614 					  sizeof(gem_create));
615 		if (ret < 0) {
616 			drv_log("GEM create failed\n");
617 			free(priv);
618 			return MAP_FAILED;
619 		}
620 
621 		priv->map_flags = map_flags;
622 		handle = priv->handle = gem_create.out.handle;
623 
624 		ret = sdma_copy(bo->drv->priv, bo->drv->fd, bo->handles[0].u32, priv->handle,
625 				bo_info.bo_size);
626 		if (ret) {
627 			drv_log("SDMA copy for read failed\n");
628 			goto fail;
629 		}
630 	}
631 
632 	gem_map.in.handle = handle;
633 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_map);
634 	if (ret) {
635 		drv_log("DRM_IOCTL_AMDGPU_GEM_MMAP failed\n");
636 		goto fail;
637 	}
638 
639 	addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
640 		    gem_map.out.addr_ptr);
641 	if (addr == MAP_FAILED)
642 		goto fail;
643 
644 	vma->priv = priv;
645 	return addr;
646 
647 fail:
648 	if (priv) {
649 		struct drm_gem_close gem_close = { 0 };
650 		gem_close.handle = priv->handle;
651 		drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
652 		free(priv);
653 	}
654 	return MAP_FAILED;
655 }
656 
amdgpu_unmap_bo(struct bo * bo,struct vma * vma)657 static int amdgpu_unmap_bo(struct bo *bo, struct vma *vma)
658 {
659 	if (bo->priv) {
660 		return dri_bo_unmap(bo, vma);
661 	} else {
662 		int r = munmap(vma->addr, vma->length);
663 		if (r)
664 			return r;
665 
666 		if (vma->priv) {
667 			struct amdgpu_linear_vma_priv *priv = vma->priv;
668 			struct drm_gem_close gem_close = { 0 };
669 
670 			if (BO_MAP_WRITE & priv->map_flags) {
671 				r = sdma_copy(bo->drv->priv, bo->drv->fd, priv->handle,
672 					      bo->handles[0].u32, vma->length);
673 				if (r)
674 					return r;
675 			}
676 
677 			gem_close.handle = priv->handle;
678 			r = drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
679 		}
680 
681 		return 0;
682 	}
683 }
684 
amdgpu_bo_invalidate(struct bo * bo,struct mapping * mapping)685 static int amdgpu_bo_invalidate(struct bo *bo, struct mapping *mapping)
686 {
687 	int ret;
688 	union drm_amdgpu_gem_wait_idle wait_idle = { { 0 } };
689 
690 	if (bo->priv)
691 		return 0;
692 
693 	wait_idle.in.handle = bo->handles[0].u32;
694 	wait_idle.in.timeout = AMDGPU_TIMEOUT_INFINITE;
695 
696 	ret = drmCommandWriteRead(bo->drv->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &wait_idle,
697 				  sizeof(wait_idle));
698 
699 	if (ret < 0) {
700 		drv_log("DRM_AMDGPU_GEM_WAIT_IDLE failed with %d\n", ret);
701 		return ret;
702 	}
703 
704 	if (ret == 0 && wait_idle.out.status)
705 		drv_log("DRM_AMDGPU_GEM_WAIT_IDLE BO is busy\n");
706 
707 	return 0;
708 }
709 
710 const struct backend backend_amdgpu = {
711 	.name = "amdgpu",
712 	.init = amdgpu_init,
713 	.close = amdgpu_close,
714 	.bo_create = amdgpu_create_bo,
715 	.bo_create_with_modifiers = amdgpu_create_bo_with_modifiers,
716 	.bo_destroy = amdgpu_destroy_bo,
717 	.bo_import = amdgpu_import_bo,
718 	.bo_map = amdgpu_map_bo,
719 	.bo_unmap = amdgpu_unmap_bo,
720 	.bo_invalidate = amdgpu_bo_invalidate,
721 	.resolve_format = drv_resolve_format_helper,
722 	.num_planes_from_modifier = dri_num_planes_from_modifier,
723 };
724 
725 #endif
726