• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_MSM
8 
9 #include <assert.h>
10 #include <dlfcn.h>
11 #include <drm_fourcc.h>
12 #include <errno.h>
13 #include <inttypes.h>
14 #include <msm_drm.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <xf86drm.h>
20 
21 #include "drv_priv.h"
22 #include "helpers.h"
23 #include "util.h"
24 
25 /* Alignment values are based on SDM845 Gfx IP */
26 #define DEFAULT_ALIGNMENT 64
27 #define BUFFER_SIZE_ALIGN 4096
28 
29 #define VENUS_STRIDE_ALIGN 128
30 #define VENUS_SCANLINE_ALIGN 16
31 #define NV12_LINEAR_PADDING (12 * 1024)
32 #define NV12_UBWC_PADDING(y_stride) (MAX(16 * 1024, y_stride * 48))
33 #define MACROTILE_WIDTH_ALIGN 64
34 #define MACROTILE_HEIGHT_ALIGN 16
35 #define PLANE_SIZE_ALIGN 4096
36 
37 #define MSM_UBWC_TILING 1
38 
39 static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
40 						  DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
41 						  DRM_FORMAT_XRGB8888 };
42 
43 static const uint32_t texture_source_formats[] = { DRM_FORMAT_NV12, DRM_FORMAT_R8,
44 						   DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
45 
46 /*
47  * Each macrotile consists of m x n (mostly 4 x 4) tiles.
48  * Pixel data pitch/stride is aligned with macrotile width.
49  * Pixel data height is aligned with macrotile height.
50  * Entire pixel data buffer is aligned with 4k(bytes).
51  */
get_ubwc_meta_size(uint32_t width,uint32_t height,uint32_t tile_width,uint32_t tile_height)52 static uint32_t get_ubwc_meta_size(uint32_t width, uint32_t height, uint32_t tile_width,
53 				   uint32_t tile_height)
54 {
55 	uint32_t macrotile_width, macrotile_height;
56 
57 	macrotile_width = DIV_ROUND_UP(width, tile_width);
58 	macrotile_height = DIV_ROUND_UP(height, tile_height);
59 
60 	// Align meta buffer width to 64 blocks
61 	macrotile_width = ALIGN(macrotile_width, MACROTILE_WIDTH_ALIGN);
62 
63 	// Align meta buffer height to 16 blocks
64 	macrotile_height = ALIGN(macrotile_height, MACROTILE_HEIGHT_ALIGN);
65 
66 	return ALIGN(macrotile_width * macrotile_height, PLANE_SIZE_ALIGN);
67 }
68 
get_pitch_alignment(struct bo * bo)69 static unsigned get_pitch_alignment(struct bo *bo)
70 {
71 	switch (bo->meta.format) {
72 	case DRM_FORMAT_NV12:
73 		return VENUS_STRIDE_ALIGN;
74 	case DRM_FORMAT_YVU420:
75 	case DRM_FORMAT_YVU420_ANDROID:
76 		/* TODO other YUV formats? */
77 		/* Something (in the video stack?) assumes the U/V planes can use
78 		 * half the pitch as the Y plane.. to componsate, double the
79 		 * alignment:
80 		 */
81 		return 2 * DEFAULT_ALIGNMENT;
82 	default:
83 		return DEFAULT_ALIGNMENT;
84 	}
85 }
86 
msm_calculate_layout(struct bo * bo)87 static void msm_calculate_layout(struct bo *bo)
88 {
89 	uint32_t width, height;
90 
91 	width = bo->meta.width;
92 	height = bo->meta.height;
93 
94 	/* NV12 format requires extra padding with platform
95 	 * specific alignments for venus driver
96 	 */
97 	if (bo->meta.format == DRM_FORMAT_NV12) {
98 		uint32_t y_stride, uv_stride, y_scanline, uv_scanline, y_plane, uv_plane, size,
99 		    extra_padding;
100 
101 		y_stride = ALIGN(width, VENUS_STRIDE_ALIGN);
102 		uv_stride = ALIGN(width, VENUS_STRIDE_ALIGN);
103 		y_scanline = ALIGN(height, VENUS_SCANLINE_ALIGN * 2);
104 		uv_scanline = ALIGN(DIV_ROUND_UP(height, 2),
105 				    VENUS_SCANLINE_ALIGN * (bo->meta.tiling ? 2 : 1));
106 		y_plane = y_stride * y_scanline;
107 		uv_plane = uv_stride * uv_scanline;
108 
109 		if (bo->meta.tiling == MSM_UBWC_TILING) {
110 			y_plane = ALIGN(y_plane, PLANE_SIZE_ALIGN);
111 			uv_plane = ALIGN(uv_plane, PLANE_SIZE_ALIGN);
112 			y_plane += get_ubwc_meta_size(width, height, 32, 8);
113 			uv_plane += get_ubwc_meta_size(width >> 1, height >> 1, 16, 8);
114 			extra_padding = NV12_UBWC_PADDING(y_stride);
115 		} else {
116 			extra_padding = NV12_LINEAR_PADDING;
117 		}
118 
119 		bo->meta.strides[0] = y_stride;
120 		bo->meta.sizes[0] = y_plane;
121 		bo->meta.offsets[1] = y_plane;
122 		bo->meta.strides[1] = uv_stride;
123 		size = y_plane + uv_plane + extra_padding;
124 		bo->meta.total_size = ALIGN(size, BUFFER_SIZE_ALIGN);
125 		bo->meta.sizes[1] = bo->meta.total_size - bo->meta.sizes[0];
126 	} else {
127 		uint32_t stride, alignw, alignh;
128 
129 		alignw = ALIGN(width, get_pitch_alignment(bo));
130 		/* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned.
131 			DRM_FORMAT_R8 of height one is used for JPEG camera output, so don't
132 			height align that. */
133 		if (bo->meta.format == DRM_FORMAT_YVU420_ANDROID ||
134 		    (bo->meta.format == DRM_FORMAT_R8 && height == 1)) {
135 			alignh = height;
136 		} else {
137 			alignh = ALIGN(height, DEFAULT_ALIGNMENT);
138 		}
139 
140 		stride = drv_stride_from_format(bo->meta.format, alignw, 0);
141 
142 		/* Calculate size and assign stride, size, offset to each plane based on format */
143 		drv_bo_from_format(bo, stride, alignh, bo->meta.format);
144 
145 		/* For all RGB UBWC formats */
146 		if (bo->meta.tiling == MSM_UBWC_TILING) {
147 			bo->meta.sizes[0] += get_ubwc_meta_size(width, height, 16, 4);
148 			bo->meta.total_size = bo->meta.sizes[0];
149 			assert(IS_ALIGNED(bo->meta.total_size, BUFFER_SIZE_ALIGN));
150 		}
151 	}
152 }
153 
is_ubwc_fmt(uint32_t format)154 static bool is_ubwc_fmt(uint32_t format)
155 {
156 	switch (format) {
157 	case DRM_FORMAT_XBGR8888:
158 	case DRM_FORMAT_ABGR8888:
159 	case DRM_FORMAT_XRGB8888:
160 	case DRM_FORMAT_ARGB8888:
161 	case DRM_FORMAT_NV12:
162 		return 1;
163 	default:
164 		return 0;
165 	}
166 }
167 
msm_add_ubwc_combinations(struct driver * drv,const uint32_t * formats,uint32_t num_formats,struct format_metadata * metadata,uint64_t use_flags)168 static void msm_add_ubwc_combinations(struct driver *drv, const uint32_t *formats,
169 				      uint32_t num_formats, struct format_metadata *metadata,
170 				      uint64_t use_flags)
171 {
172 	for (uint32_t i = 0; i < num_formats; i++) {
173 		if (is_ubwc_fmt(formats[i])) {
174 			struct combination combo = { .format = formats[i],
175 						     .metadata = *metadata,
176 						     .use_flags = use_flags };
177 			drv_array_append(drv->combos, &combo);
178 		}
179 	}
180 }
181 
182 /**
183  * Check for buggy apps that are known to not support modifiers, to avoid surprising them
184  * with a UBWC buffer.
185  */
should_avoid_ubwc(void)186 static bool should_avoid_ubwc(void)
187 {
188 #ifndef __ANDROID__
189 	/* waffle is buggy and, requests a renderable buffer (which on qcom platforms, we
190 	 * want to use UBWC), and then passes it to the kernel discarding the modifier.
191 	 * So mesa ends up correctly rendering to as tiled+compressed, but kernel tries
192 	 * to display as linear.  Other platforms do not see this issue, simply because
193 	 * they only use compressed (ex, AFBC) with the BO_USE_SCANOUT flag.
194 	 *
195 	 * See b/163137550
196 	 */
197 	if (dlsym(RTLD_DEFAULT, "waffle_display_connect")) {
198 		drv_log("WARNING: waffle detected, disabling UBWC\n");
199 		return true;
200 	}
201 
202 	/* The video_decode_accelerator_tests needs to read back the frames
203 	 * to verify they are correct.  The frame verification relies on
204 	 * computing the MD5 of the video frame.  UBWC results in a different
205 	 * MD5.  This turns off UBWC for gtest until a proper frame
206 	 * comparison can be made
207 	 * Rely on the same mechanism that waffle is using, but this time check
208 	 * for a dynamic library function that is present in chrome, but missing
209 	 * in gtest.  Cups is not loaded for video tests.
210 	 *
211 	 * See b/171260705
212 	 */
213 	if (!dlsym(RTLD_DEFAULT, "cupsFilePrintf")) {
214 		drv_log("WARNING: gtest detected, disabling UBWC\n");
215 		return true;
216 	}
217 #endif
218 	return false;
219 }
220 
msm_init(struct driver * drv)221 static int msm_init(struct driver *drv)
222 {
223 	struct format_metadata metadata;
224 	uint64_t render_use_flags = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
225 	uint64_t texture_use_flags = BO_USE_TEXTURE_MASK | BO_USE_HW_VIDEO_DECODER;
226 	/*
227 	 * NOTE: we actually could use tiled in the BO_USE_FRONT_RENDERING case,
228 	 * if we had a modifier for tiled-but-not-compressed.  But we *cannot* use
229 	 * compressed in this case because the UBWC flags/meta data can be out of
230 	 * sync with pixel data while the GPU is writing a frame out to memory.
231 	 */
232 	uint64_t sw_flags =
233 	    (BO_USE_RENDERSCRIPT | BO_USE_SW_MASK | BO_USE_LINEAR | BO_USE_FRONT_RENDERING);
234 
235 	drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
236 			     &LINEAR_METADATA, render_use_flags);
237 
238 	drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
239 			     &LINEAR_METADATA, texture_use_flags);
240 
241 	/* The camera stack standardizes on NV12 for YUV buffers. */
242 	/* YVU420 and NV12 formats for camera, display and encoding. */
243 	drv_modify_combination(drv, DRM_FORMAT_NV12, &LINEAR_METADATA,
244 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
245 				   BO_USE_HW_VIDEO_ENCODER);
246 
247 	/*
248 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
249 	 * from camera and input/output from hardware decoder/encoder.
250 	 */
251 	drv_modify_combination(drv, DRM_FORMAT_R8, &LINEAR_METADATA,
252 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
253 				   BO_USE_HW_VIDEO_ENCODER);
254 
255 	/* Android CTS tests require this. */
256 	drv_add_combination(drv, DRM_FORMAT_BGR888, &LINEAR_METADATA, BO_USE_SW_MASK);
257 
258 	drv_modify_linear_combinations(drv);
259 
260 	if (should_avoid_ubwc() || !drv->compression)
261 		return 0;
262 
263 	metadata.tiling = MSM_UBWC_TILING;
264 	metadata.priority = 2;
265 	metadata.modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
266 
267 	render_use_flags &= ~sw_flags;
268 	texture_use_flags &= ~sw_flags;
269 
270 	msm_add_ubwc_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
271 				  &metadata, render_use_flags);
272 
273 	msm_add_ubwc_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
274 				  &metadata, texture_use_flags);
275 
276 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
277 			       BO_USE_SCANOUT | BO_USE_HW_VIDEO_ENCODER);
278 
279 	return 0;
280 }
281 
msm_bo_create_for_modifier(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,const uint64_t modifier)282 static int msm_bo_create_for_modifier(struct bo *bo, uint32_t width, uint32_t height,
283 				      uint32_t format, const uint64_t modifier)
284 {
285 	struct drm_msm_gem_new req = { 0 };
286 	int ret;
287 	size_t i;
288 
289 	bo->meta.tiling = (modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) ? MSM_UBWC_TILING : 0;
290 	msm_calculate_layout(bo);
291 
292 	req.flags = MSM_BO_WC | MSM_BO_SCANOUT;
293 	req.size = bo->meta.total_size;
294 
295 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_NEW, &req);
296 	if (ret) {
297 		drv_log("DRM_IOCTL_MSM_GEM_NEW failed with %s\n", strerror(errno));
298 		return -errno;
299 	}
300 
301 	/*
302 	 * Though we use only one plane, we need to set handle for
303 	 * all planes to pass kernel checks
304 	 */
305 	for (i = 0; i < bo->meta.num_planes; i++)
306 		bo->handles[i].u32 = req.handle;
307 
308 	bo->meta.format_modifier = modifier;
309 	return 0;
310 }
311 
msm_bo_create_with_modifiers(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,const uint64_t * modifiers,uint32_t count)312 static int msm_bo_create_with_modifiers(struct bo *bo, uint32_t width, uint32_t height,
313 					uint32_t format, const uint64_t *modifiers, uint32_t count)
314 {
315 	static const uint64_t modifier_order[] = {
316 		DRM_FORMAT_MOD_QCOM_COMPRESSED,
317 		DRM_FORMAT_MOD_LINEAR,
318 	};
319 
320 	uint64_t modifier =
321 	    drv_pick_modifier(modifiers, count, modifier_order, ARRAY_SIZE(modifier_order));
322 
323 	if (!bo->drv->compression && modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED)
324 		modifier = DRM_FORMAT_MOD_LINEAR;
325 
326 	return msm_bo_create_for_modifier(bo, width, height, format, modifier);
327 }
328 
329 /* msm_bo_create will create linear buffers for now */
msm_bo_create(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t flags)330 static int msm_bo_create(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
331 			 uint64_t flags)
332 {
333 	struct combination *combo = drv_get_combination(bo->drv, format, flags);
334 
335 	if (!combo) {
336 		drv_log("invalid format = %d, flags = %" PRIx64 " combination\n", format, flags);
337 		return -EINVAL;
338 	}
339 
340 	return msm_bo_create_for_modifier(bo, width, height, format, combo->metadata.modifier);
341 }
342 
msm_bo_map(struct bo * bo,struct vma * vma,size_t plane,uint32_t map_flags)343 static void *msm_bo_map(struct bo *bo, struct vma *vma, size_t plane, uint32_t map_flags)
344 {
345 	int ret;
346 	struct drm_msm_gem_info req = { 0 };
347 
348 	req.handle = bo->handles[0].u32;
349 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_INFO, &req);
350 	if (ret) {
351 		drv_log("DRM_IOCLT_MSM_GEM_INFO failed with %s\n", strerror(errno));
352 		return MAP_FAILED;
353 	}
354 	vma->length = bo->meta.total_size;
355 
356 	return mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
357 		    req.offset);
358 }
359 
360 const struct backend backend_msm = {
361 	.name = "msm",
362 	.init = msm_init,
363 	.bo_create = msm_bo_create,
364 	.bo_create_with_modifiers = msm_bo_create_with_modifiers,
365 	.bo_destroy = drv_gem_bo_destroy,
366 	.bo_import = drv_prime_bo_import,
367 	.bo_map = msm_bo_map,
368 	.bo_unmap = drv_bo_munmap,
369 	.resolve_format = drv_resolve_format_helper,
370 };
371 #endif /* DRV_MSM */
372