• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Broadcom
4  */
5 
6 /**
7  * DOC: VC4 plane module
8  *
9  * Each DRM plane is a layer of pixels being scanned out by the HVS.
10  *
11  * At atomic modeset check time, we compute the HVS display element
12  * state that would be necessary for displaying the plane (giving us a
13  * chance to figure out if a plane configuration is invalid), then at
14  * atomic flush time the CRTC will ask us to write our element state
15  * into the region of the HVS that it has allocated for us.
16  */
17 
18 #include <drm/drm_atomic.h>
19 #include <drm/drm_atomic_helper.h>
20 #include <drm/drm_atomic_uapi.h>
21 #include <drm/drm_fb_cma_helper.h>
22 #include <drm/drm_fourcc.h>
23 #include <drm/drm_gem_atomic_helper.h>
24 #include <drm/drm_plane_helper.h>
25 
26 #include "uapi/drm/vc4_drm.h"
27 
28 #include "vc4_drv.h"
29 #include "vc4_regs.h"
30 
31 static const struct hvs_format {
32 	u32 drm; /* DRM_FORMAT_* */
33 	u32 hvs; /* HVS_FORMAT_* */
34 	u32 pixel_order;
35 	u32 pixel_order_hvs5;
36 } hvs_formats[] = {
37 	{
38 		.drm = DRM_FORMAT_XRGB8888,
39 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
40 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
41 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
42 	},
43 	{
44 		.drm = DRM_FORMAT_ARGB8888,
45 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
46 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
47 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
48 	},
49 	{
50 		.drm = DRM_FORMAT_ABGR8888,
51 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
52 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
53 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
54 	},
55 	{
56 		.drm = DRM_FORMAT_XBGR8888,
57 		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
58 		.pixel_order = HVS_PIXEL_ORDER_ARGB,
59 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
60 	},
61 	{
62 		.drm = DRM_FORMAT_RGB565,
63 		.hvs = HVS_PIXEL_FORMAT_RGB565,
64 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
65 	},
66 	{
67 		.drm = DRM_FORMAT_BGR565,
68 		.hvs = HVS_PIXEL_FORMAT_RGB565,
69 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
70 	},
71 	{
72 		.drm = DRM_FORMAT_ARGB1555,
73 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
74 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
75 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
76 	},
77 	{
78 		.drm = DRM_FORMAT_XRGB1555,
79 		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
80 		.pixel_order = HVS_PIXEL_ORDER_ABGR,
81 		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
82 	},
83 	{
84 		.drm = DRM_FORMAT_RGB888,
85 		.hvs = HVS_PIXEL_FORMAT_RGB888,
86 		.pixel_order = HVS_PIXEL_ORDER_XRGB,
87 	},
88 	{
89 		.drm = DRM_FORMAT_BGR888,
90 		.hvs = HVS_PIXEL_FORMAT_RGB888,
91 		.pixel_order = HVS_PIXEL_ORDER_XBGR,
92 	},
93 	{
94 		.drm = DRM_FORMAT_YUV422,
95 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
96 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
97 	},
98 	{
99 		.drm = DRM_FORMAT_YVU422,
100 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
101 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
102 	},
103 	{
104 		.drm = DRM_FORMAT_YUV420,
105 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
106 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
107 	},
108 	{
109 		.drm = DRM_FORMAT_YVU420,
110 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
111 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
112 	},
113 	{
114 		.drm = DRM_FORMAT_NV12,
115 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
116 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
117 	},
118 	{
119 		.drm = DRM_FORMAT_NV21,
120 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
121 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122 	},
123 	{
124 		.drm = DRM_FORMAT_NV16,
125 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
126 		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
127 	},
128 	{
129 		.drm = DRM_FORMAT_NV61,
130 		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
131 		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
132 	},
133 };
134 
vc4_get_hvs_format(u32 drm_format)135 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
136 {
137 	unsigned i;
138 
139 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
140 		if (hvs_formats[i].drm == drm_format)
141 			return &hvs_formats[i];
142 	}
143 
144 	return NULL;
145 }
146 
vc4_get_scaling_mode(u32 src,u32 dst)147 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
148 {
149 	if (dst == src)
150 		return VC4_SCALING_NONE;
151 	if (3 * dst >= 2 * src)
152 		return VC4_SCALING_PPF;
153 	else
154 		return VC4_SCALING_TPZ;
155 }
156 
plane_enabled(struct drm_plane_state * state)157 static bool plane_enabled(struct drm_plane_state *state)
158 {
159 	return state->fb && !WARN_ON(!state->crtc);
160 }
161 
vc4_plane_duplicate_state(struct drm_plane * plane)162 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
163 {
164 	struct vc4_plane_state *vc4_state;
165 
166 	if (WARN_ON(!plane->state))
167 		return NULL;
168 
169 	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
170 	if (!vc4_state)
171 		return NULL;
172 
173 	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
174 	vc4_state->dlist_initialized = 0;
175 
176 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
177 
178 	if (vc4_state->dlist) {
179 		vc4_state->dlist = kmemdup(vc4_state->dlist,
180 					   vc4_state->dlist_count * 4,
181 					   GFP_KERNEL);
182 		if (!vc4_state->dlist) {
183 			kfree(vc4_state);
184 			return NULL;
185 		}
186 		vc4_state->dlist_size = vc4_state->dlist_count;
187 	}
188 
189 	return &vc4_state->base;
190 }
191 
vc4_plane_destroy_state(struct drm_plane * plane,struct drm_plane_state * state)192 static void vc4_plane_destroy_state(struct drm_plane *plane,
193 				    struct drm_plane_state *state)
194 {
195 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
196 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
197 
198 	if (drm_mm_node_allocated(&vc4_state->lbm)) {
199 		unsigned long irqflags;
200 
201 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
202 		drm_mm_remove_node(&vc4_state->lbm);
203 		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
204 	}
205 
206 	kfree(vc4_state->dlist);
207 	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
208 	kfree(state);
209 }
210 
211 /* Called during init to allocate the plane's atomic state. */
vc4_plane_reset(struct drm_plane * plane)212 static void vc4_plane_reset(struct drm_plane *plane)
213 {
214 	struct vc4_plane_state *vc4_state;
215 
216 	WARN_ON(plane->state);
217 
218 	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
219 	if (!vc4_state)
220 		return;
221 
222 	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
223 }
224 
vc4_dlist_counter_increment(struct vc4_plane_state * vc4_state)225 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
226 {
227 	if (vc4_state->dlist_count == vc4_state->dlist_size) {
228 		u32 new_size = max(4u, vc4_state->dlist_count * 2);
229 		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
230 
231 		if (!new_dlist)
232 			return;
233 		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
234 
235 		kfree(vc4_state->dlist);
236 		vc4_state->dlist = new_dlist;
237 		vc4_state->dlist_size = new_size;
238 	}
239 
240 	vc4_state->dlist_count++;
241 }
242 
vc4_dlist_write(struct vc4_plane_state * vc4_state,u32 val)243 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
244 {
245 	unsigned int idx = vc4_state->dlist_count;
246 
247 	vc4_dlist_counter_increment(vc4_state);
248 	vc4_state->dlist[idx] = val;
249 }
250 
251 /* Returns the scl0/scl1 field based on whether the dimensions need to
252  * be up/down/non-scaled.
253  *
254  * This is a replication of a table from the spec.
255  */
vc4_get_scl_field(struct drm_plane_state * state,int plane)256 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
257 {
258 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
259 
260 	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
261 	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
262 		return SCALER_CTL0_SCL_H_PPF_V_PPF;
263 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
264 		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
265 	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
266 		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
267 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
268 		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
269 	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
270 		return SCALER_CTL0_SCL_H_PPF_V_NONE;
271 	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
272 		return SCALER_CTL0_SCL_H_NONE_V_PPF;
273 	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
274 		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
275 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
276 		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
277 	default:
278 	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
279 		/* The unity case is independently handled by
280 		 * SCALER_CTL0_UNITY.
281 		 */
282 		return 0;
283 	}
284 }
285 
vc4_plane_margins_adj(struct drm_plane_state * pstate)286 static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
287 {
288 	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
289 	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
290 	struct drm_crtc_state *crtc_state;
291 
292 	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
293 						   pstate->crtc);
294 
295 	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
296 	if (!left && !right && !top && !bottom)
297 		return 0;
298 
299 	if (left + right >= crtc_state->mode.hdisplay ||
300 	    top + bottom >= crtc_state->mode.vdisplay)
301 		return -EINVAL;
302 
303 	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
304 	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
305 					       adjhdisplay,
306 					       crtc_state->mode.hdisplay);
307 	vc4_pstate->crtc_x += left;
308 	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
309 		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
310 
311 	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
312 	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
313 					       adjvdisplay,
314 					       crtc_state->mode.vdisplay);
315 	vc4_pstate->crtc_y += top;
316 	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
317 		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
318 
319 	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
320 					       adjhdisplay,
321 					       crtc_state->mode.hdisplay);
322 	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
323 					       adjvdisplay,
324 					       crtc_state->mode.vdisplay);
325 
326 	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
327 		return -EINVAL;
328 
329 	return 0;
330 }
331 
vc4_plane_setup_clipping_and_scaling(struct drm_plane_state * state)332 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
333 {
334 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
335 	struct drm_framebuffer *fb = state->fb;
336 	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
337 	int num_planes = fb->format->num_planes;
338 	struct drm_crtc_state *crtc_state;
339 	u32 h_subsample = fb->format->hsub;
340 	u32 v_subsample = fb->format->vsub;
341 	int i, ret;
342 
343 	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
344 							state->crtc);
345 	if (!crtc_state) {
346 		DRM_DEBUG_KMS("Invalid crtc state\n");
347 		return -EINVAL;
348 	}
349 
350 	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
351 						  INT_MAX, true, true);
352 	if (ret)
353 		return ret;
354 
355 	for (i = 0; i < num_planes; i++)
356 		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
357 
358 	/*
359 	 * We don't support subpixel source positioning for scaling,
360 	 * but fractional coordinates can be generated by clipping
361 	 * so just round for now
362 	 */
363 	vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);
364 	vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);
365 	vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;
366 	vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;
367 
368 	vc4_state->crtc_x = state->dst.x1;
369 	vc4_state->crtc_y = state->dst.y1;
370 	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
371 	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
372 
373 	ret = vc4_plane_margins_adj(state);
374 	if (ret)
375 		return ret;
376 
377 	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
378 						       vc4_state->crtc_w);
379 	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
380 						       vc4_state->crtc_h);
381 
382 	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
383 			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
384 
385 	if (num_planes > 1) {
386 		vc4_state->is_yuv = true;
387 
388 		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
389 		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
390 
391 		vc4_state->x_scaling[1] =
392 			vc4_get_scaling_mode(vc4_state->src_w[1],
393 					     vc4_state->crtc_w);
394 		vc4_state->y_scaling[1] =
395 			vc4_get_scaling_mode(vc4_state->src_h[1],
396 					     vc4_state->crtc_h);
397 
398 		/* YUV conversion requires that horizontal scaling be enabled
399 		 * on the UV plane even if vc4_get_scaling_mode() returned
400 		 * VC4_SCALING_NONE (which can happen when the down-scaling
401 		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
402 		 * case.
403 		 */
404 		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
405 			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
406 	} else {
407 		vc4_state->is_yuv = false;
408 		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
409 		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
410 	}
411 
412 	return 0;
413 }
414 
vc4_write_tpz(struct vc4_plane_state * vc4_state,u32 src,u32 dst)415 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
416 {
417 	u32 scale, recip;
418 
419 	scale = (1 << 16) * src / dst;
420 
421 	/* The specs note that while the reciprocal would be defined
422 	 * as (1<<32)/scale, ~0 is close enough.
423 	 */
424 	recip = ~0 / scale;
425 
426 	vc4_dlist_write(vc4_state,
427 			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
428 			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
429 	vc4_dlist_write(vc4_state,
430 			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
431 }
432 
vc4_write_ppf(struct vc4_plane_state * vc4_state,u32 src,u32 dst)433 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
434 {
435 	u32 scale = (1 << 16) * src / dst;
436 
437 	vc4_dlist_write(vc4_state,
438 			SCALER_PPF_AGC |
439 			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
440 			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
441 }
442 
vc4_lbm_size(struct drm_plane_state * state)443 static u32 vc4_lbm_size(struct drm_plane_state *state)
444 {
445 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
446 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
447 	u32 pix_per_line;
448 	u32 lbm;
449 
450 	/* LBM is not needed when there's no vertical scaling. */
451 	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
452 	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
453 		return 0;
454 
455 	/*
456 	 * This can be further optimized in the RGB/YUV444 case if the PPF
457 	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
458 	 *
459 	 * It's not an issue though, since in that case since src_w[0] is going
460 	 * to be greater than or equal to crtc_w.
461 	 */
462 	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
463 		pix_per_line = vc4_state->crtc_w;
464 	else
465 		pix_per_line = vc4_state->src_w[0];
466 
467 	if (!vc4_state->is_yuv) {
468 		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
469 			lbm = pix_per_line * 8;
470 		else {
471 			/* In special cases, this multiplier might be 12. */
472 			lbm = pix_per_line * 16;
473 		}
474 	} else {
475 		/* There are cases for this going down to a multiplier
476 		 * of 2, but according to the firmware source, the
477 		 * table in the docs is somewhat wrong.
478 		 */
479 		lbm = pix_per_line * 16;
480 	}
481 
482 	/* Align it to 64 or 128 (hvs5) bytes */
483 	lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64);
484 
485 	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
486 	lbm /= vc4->hvs->hvs5 ? 4 : 2;
487 
488 	return lbm;
489 }
490 
vc4_write_scaling_parameters(struct drm_plane_state * state,int channel)491 static void vc4_write_scaling_parameters(struct drm_plane_state *state,
492 					 int channel)
493 {
494 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
495 
496 	/* Ch0 H-PPF Word 0: Scaling Parameters */
497 	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
498 		vc4_write_ppf(vc4_state,
499 			      vc4_state->src_w[channel], vc4_state->crtc_w);
500 	}
501 
502 	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
503 	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
504 		vc4_write_ppf(vc4_state,
505 			      vc4_state->src_h[channel], vc4_state->crtc_h);
506 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
507 	}
508 
509 	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
510 	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
511 		vc4_write_tpz(vc4_state,
512 			      vc4_state->src_w[channel], vc4_state->crtc_w);
513 	}
514 
515 	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
516 	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
517 		vc4_write_tpz(vc4_state,
518 			      vc4_state->src_h[channel], vc4_state->crtc_h);
519 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
520 	}
521 }
522 
vc4_plane_calc_load(struct drm_plane_state * state)523 static void vc4_plane_calc_load(struct drm_plane_state *state)
524 {
525 	unsigned int hvs_load_shift, vrefresh, i;
526 	struct drm_framebuffer *fb = state->fb;
527 	struct vc4_plane_state *vc4_state;
528 	struct drm_crtc_state *crtc_state;
529 	unsigned int vscale_factor;
530 	struct vc4_dev *vc4;
531 
532 	vc4 = to_vc4_dev(state->plane->dev);
533 	if (!vc4->load_tracker_available)
534 		return;
535 
536 	vc4_state = to_vc4_plane_state(state);
537 	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
538 							state->crtc);
539 	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
540 
541 	/* The HVS is able to process 2 pixels/cycle when scaling the source,
542 	 * 4 pixels/cycle otherwise.
543 	 * Alpha blending step seems to be pipelined and it's always operating
544 	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
545 	 * scaler block.
546 	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
547 	 */
548 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
549 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
550 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
551 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
552 		hvs_load_shift = 1;
553 	else
554 		hvs_load_shift = 2;
555 
556 	vc4_state->membus_load = 0;
557 	vc4_state->hvs_load = 0;
558 	for (i = 0; i < fb->format->num_planes; i++) {
559 		/* Even if the bandwidth/plane required for a single frame is
560 		 *
561 		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
562 		 *
563 		 * when downscaling, we have to read more pixels per line in
564 		 * the time frame reserved for a single line, so the bandwidth
565 		 * demand can be punctually higher. To account for that, we
566 		 * calculate the down-scaling factor and multiply the plane
567 		 * load by this number. We're likely over-estimating the read
568 		 * demand, but that's better than under-estimating it.
569 		 */
570 		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
571 					     vc4_state->crtc_h);
572 		vc4_state->membus_load += vc4_state->src_w[i] *
573 					  vc4_state->src_h[i] * vscale_factor *
574 					  fb->format->cpp[i];
575 		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
576 	}
577 
578 	vc4_state->hvs_load *= vrefresh;
579 	vc4_state->hvs_load >>= hvs_load_shift;
580 	vc4_state->membus_load *= vrefresh;
581 }
582 
vc4_plane_allocate_lbm(struct drm_plane_state * state)583 static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
584 {
585 	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
586 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
587 	unsigned long irqflags;
588 	u32 lbm_size;
589 
590 	lbm_size = vc4_lbm_size(state);
591 	if (!lbm_size)
592 		return 0;
593 
594 	if (WARN_ON(!vc4_state->lbm_offset))
595 		return -EINVAL;
596 
597 	/* Allocate the LBM memory that the HVS will use for temporary
598 	 * storage due to our scaling/format conversion.
599 	 */
600 	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
601 		int ret;
602 
603 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
604 		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
605 						 &vc4_state->lbm,
606 						 lbm_size,
607 						 vc4->hvs->hvs5 ? 64 : 32,
608 						 0, 0);
609 		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
610 
611 		if (ret)
612 			return ret;
613 	} else {
614 		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
615 	}
616 
617 	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
618 
619 	return 0;
620 }
621 
622 /* Writes out a full display list for an active plane to the plane's
623  * private dlist state.
624  */
vc4_plane_mode_set(struct drm_plane * plane,struct drm_plane_state * state)625 static int vc4_plane_mode_set(struct drm_plane *plane,
626 			      struct drm_plane_state *state)
627 {
628 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
629 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
630 	struct drm_framebuffer *fb = state->fb;
631 	u32 ctl0_offset = vc4_state->dlist_count;
632 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
633 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
634 	int num_planes = fb->format->num_planes;
635 	u32 h_subsample = fb->format->hsub;
636 	u32 v_subsample = fb->format->vsub;
637 	bool mix_plane_alpha;
638 	bool covers_screen;
639 	u32 scl0, scl1, pitch0;
640 	u32 tiling, src_y;
641 	u32 hvs_format = format->hvs;
642 	unsigned int rotation;
643 	int ret, i;
644 
645 	if (vc4_state->dlist_initialized)
646 		return 0;
647 
648 	ret = vc4_plane_setup_clipping_and_scaling(state);
649 	if (ret)
650 		return ret;
651 
652 	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
653 	 * and 4:4:4, scl1 should be set to scl0 so both channels of
654 	 * the scaler do the same thing.  For YUV, the Y plane needs
655 	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
656 	 * the scl fields here.
657 	 */
658 	if (num_planes == 1) {
659 		scl0 = vc4_get_scl_field(state, 0);
660 		scl1 = scl0;
661 	} else {
662 		scl0 = vc4_get_scl_field(state, 1);
663 		scl1 = vc4_get_scl_field(state, 0);
664 	}
665 
666 	rotation = drm_rotation_simplify(state->rotation,
667 					 DRM_MODE_ROTATE_0 |
668 					 DRM_MODE_REFLECT_X |
669 					 DRM_MODE_REFLECT_Y);
670 
671 	/* We must point to the last line when Y reflection is enabled. */
672 	src_y = vc4_state->src_y;
673 	if (rotation & DRM_MODE_REFLECT_Y)
674 		src_y += vc4_state->src_h[0] - 1;
675 
676 	switch (base_format_mod) {
677 	case DRM_FORMAT_MOD_LINEAR:
678 		tiling = SCALER_CTL0_TILING_LINEAR;
679 		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
680 
681 		/* Adjust the base pointer to the first pixel to be scanned
682 		 * out.
683 		 */
684 		for (i = 0; i < num_planes; i++) {
685 			vc4_state->offsets[i] += src_y /
686 						 (i ? v_subsample : 1) *
687 						 fb->pitches[i];
688 
689 			vc4_state->offsets[i] += vc4_state->src_x /
690 						 (i ? h_subsample : 1) *
691 						 fb->format->cpp[i];
692 		}
693 
694 		break;
695 
696 	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
697 		u32 tile_size_shift = 12; /* T tiles are 4kb */
698 		/* Whole-tile offsets, mostly for setting the pitch. */
699 		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
700 		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
701 		u32 tile_w_mask = (1 << tile_w_shift) - 1;
702 		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
703 		 * the height (in pixels) of a 4k tile.
704 		 */
705 		u32 tile_h_mask = (2 << tile_h_shift) - 1;
706 		/* For T-tiled, the FB pitch is "how many bytes from one row to
707 		 * the next, such that
708 		 *
709 		 *	pitch * tile_h == tile_size * tiles_per_row
710 		 */
711 		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
712 		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
713 		u32 tiles_r = tiles_w - tiles_l;
714 		u32 tiles_t = src_y >> tile_h_shift;
715 		/* Intra-tile offsets, which modify the base address (the
716 		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
717 		 * base address).
718 		 */
719 		u32 tile_y = (src_y >> 4) & 1;
720 		u32 subtile_y = (src_y >> 2) & 3;
721 		u32 utile_y = src_y & 3;
722 		u32 x_off = vc4_state->src_x & tile_w_mask;
723 		u32 y_off = src_y & tile_h_mask;
724 
725 		/* When Y reflection is requested we must set the
726 		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
727 		 * after the initial one should be fetched in descending order,
728 		 * which makes sense since we start from the last line and go
729 		 * backward.
730 		 * Don't know why we need y_off = max_y_off - y_off, but it's
731 		 * definitely required (I guess it's also related to the "going
732 		 * backward" situation).
733 		 */
734 		if (rotation & DRM_MODE_REFLECT_Y) {
735 			y_off = tile_h_mask - y_off;
736 			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
737 		} else {
738 			pitch0 = 0;
739 		}
740 
741 		tiling = SCALER_CTL0_TILING_256B_OR_T;
742 		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
743 			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
744 			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
745 			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
746 		vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
747 		vc4_state->offsets[0] += subtile_y << 8;
748 		vc4_state->offsets[0] += utile_y << 4;
749 
750 		/* Rows of tiles alternate left-to-right and right-to-left. */
751 		if (tiles_t & 1) {
752 			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
753 			vc4_state->offsets[0] += (tiles_w - tiles_l) <<
754 						 tile_size_shift;
755 			vc4_state->offsets[0] -= (1 + !tile_y) << 10;
756 		} else {
757 			vc4_state->offsets[0] += tiles_l << tile_size_shift;
758 			vc4_state->offsets[0] += tile_y << 10;
759 		}
760 
761 		break;
762 	}
763 
764 	case DRM_FORMAT_MOD_BROADCOM_SAND64:
765 	case DRM_FORMAT_MOD_BROADCOM_SAND128:
766 	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
767 		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
768 		u32 tile_w, tile, x_off, pix_per_tile;
769 
770 		hvs_format = HVS_PIXEL_FORMAT_H264;
771 
772 		switch (base_format_mod) {
773 		case DRM_FORMAT_MOD_BROADCOM_SAND64:
774 			tiling = SCALER_CTL0_TILING_64B;
775 			tile_w = 64;
776 			break;
777 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
778 			tiling = SCALER_CTL0_TILING_128B;
779 			tile_w = 128;
780 			break;
781 		case DRM_FORMAT_MOD_BROADCOM_SAND256:
782 			tiling = SCALER_CTL0_TILING_256B_OR_T;
783 			tile_w = 256;
784 			break;
785 		default:
786 			break;
787 		}
788 
789 		if (param > SCALER_TILE_HEIGHT_MASK) {
790 			DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
791 			return -EINVAL;
792 		}
793 
794 		pix_per_tile = tile_w / fb->format->cpp[0];
795 		tile = vc4_state->src_x / pix_per_tile;
796 		x_off = vc4_state->src_x % pix_per_tile;
797 
798 		/* Adjust the base pointer to the first pixel to be scanned
799 		 * out.
800 		 */
801 		for (i = 0; i < num_planes; i++) {
802 			vc4_state->offsets[i] += param * tile_w * tile;
803 			vc4_state->offsets[i] += src_y /
804 						 (i ? v_subsample : 1) *
805 						 tile_w;
806 			vc4_state->offsets[i] += x_off /
807 						 (i ? h_subsample : 1) *
808 						 fb->format->cpp[i];
809 		}
810 
811 		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
812 		break;
813 	}
814 
815 	default:
816 		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
817 			      (long long)fb->modifier);
818 		return -EINVAL;
819 	}
820 
821 	/* Don't waste cycles mixing with plane alpha if the set alpha
822 	 * is opaque or there is no per-pixel alpha information.
823 	 * In any case we use the alpha property value as the fixed alpha.
824 	 */
825 	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
826 			  fb->format->has_alpha;
827 
828 	if (!vc4->hvs->hvs5) {
829 	/* Control word */
830 		vc4_dlist_write(vc4_state,
831 				SCALER_CTL0_VALID |
832 				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
833 				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
834 				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
835 				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
836 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
837 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
838 				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
839 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
840 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
841 
842 		/* Position Word 0: Image Positions and Alpha Value */
843 		vc4_state->pos0_offset = vc4_state->dlist_count;
844 		vc4_dlist_write(vc4_state,
845 				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
846 				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
847 				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
848 
849 		/* Position Word 1: Scaled Image Dimensions. */
850 		if (!vc4_state->is_unity) {
851 			vc4_dlist_write(vc4_state,
852 					VC4_SET_FIELD(vc4_state->crtc_w,
853 						      SCALER_POS1_SCL_WIDTH) |
854 					VC4_SET_FIELD(vc4_state->crtc_h,
855 						      SCALER_POS1_SCL_HEIGHT));
856 		}
857 
858 		/* Position Word 2: Source Image Size, Alpha */
859 		vc4_state->pos2_offset = vc4_state->dlist_count;
860 		vc4_dlist_write(vc4_state,
861 				VC4_SET_FIELD(fb->format->has_alpha ?
862 					      SCALER_POS2_ALPHA_MODE_PIPELINE :
863 					      SCALER_POS2_ALPHA_MODE_FIXED,
864 					      SCALER_POS2_ALPHA_MODE) |
865 				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
866 				(fb->format->has_alpha ?
867 						SCALER_POS2_ALPHA_PREMULT : 0) |
868 				VC4_SET_FIELD(vc4_state->src_w[0],
869 					      SCALER_POS2_WIDTH) |
870 				VC4_SET_FIELD(vc4_state->src_h[0],
871 					      SCALER_POS2_HEIGHT));
872 
873 		/* Position Word 3: Context.  Written by the HVS. */
874 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
875 
876 	} else {
877 		u32 hvs_pixel_order = format->pixel_order;
878 
879 		if (format->pixel_order_hvs5)
880 			hvs_pixel_order = format->pixel_order_hvs5;
881 
882 		/* Control word */
883 		vc4_dlist_write(vc4_state,
884 				SCALER_CTL0_VALID |
885 				(hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
886 				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
887 				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
888 				(vc4_state->is_unity ?
889 						SCALER5_CTL0_UNITY : 0) |
890 				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
891 				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
892 				SCALER5_CTL0_ALPHA_EXPAND |
893 				SCALER5_CTL0_RGB_EXPAND);
894 
895 		/* Position Word 0: Image Positions and Alpha Value */
896 		vc4_state->pos0_offset = vc4_state->dlist_count;
897 		vc4_dlist_write(vc4_state,
898 				(rotation & DRM_MODE_REFLECT_Y ?
899 						SCALER5_POS0_VFLIP : 0) |
900 				VC4_SET_FIELD(vc4_state->crtc_x,
901 					      SCALER_POS0_START_X) |
902 				(rotation & DRM_MODE_REFLECT_X ?
903 					      SCALER5_POS0_HFLIP : 0) |
904 				VC4_SET_FIELD(vc4_state->crtc_y,
905 					      SCALER5_POS0_START_Y)
906 			       );
907 
908 		/* Control Word 2 */
909 		vc4_dlist_write(vc4_state,
910 				VC4_SET_FIELD(state->alpha >> 4,
911 					      SCALER5_CTL2_ALPHA) |
912 				(fb->format->has_alpha ?
913 					SCALER5_CTL2_ALPHA_PREMULT : 0) |
914 				(mix_plane_alpha ?
915 					SCALER5_CTL2_ALPHA_MIX : 0) |
916 				VC4_SET_FIELD(fb->format->has_alpha ?
917 				      SCALER5_CTL2_ALPHA_MODE_PIPELINE :
918 				      SCALER5_CTL2_ALPHA_MODE_FIXED,
919 				      SCALER5_CTL2_ALPHA_MODE)
920 			       );
921 
922 		/* Position Word 1: Scaled Image Dimensions. */
923 		if (!vc4_state->is_unity) {
924 			vc4_dlist_write(vc4_state,
925 					VC4_SET_FIELD(vc4_state->crtc_w,
926 						      SCALER5_POS1_SCL_WIDTH) |
927 					VC4_SET_FIELD(vc4_state->crtc_h,
928 						      SCALER5_POS1_SCL_HEIGHT));
929 		}
930 
931 		/* Position Word 2: Source Image Size */
932 		vc4_state->pos2_offset = vc4_state->dlist_count;
933 		vc4_dlist_write(vc4_state,
934 				VC4_SET_FIELD(vc4_state->src_w[0],
935 					      SCALER5_POS2_WIDTH) |
936 				VC4_SET_FIELD(vc4_state->src_h[0],
937 					      SCALER5_POS2_HEIGHT));
938 
939 		/* Position Word 3: Context.  Written by the HVS. */
940 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
941 	}
942 
943 
944 	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
945 	 *
946 	 * The pointers may be any byte address.
947 	 */
948 	vc4_state->ptr0_offset = vc4_state->dlist_count;
949 	for (i = 0; i < num_planes; i++)
950 		vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
951 
952 	/* Pointer Context Word 0/1/2: Written by the HVS */
953 	for (i = 0; i < num_planes; i++)
954 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
955 
956 	/* Pitch word 0 */
957 	vc4_dlist_write(vc4_state, pitch0);
958 
959 	/* Pitch word 1/2 */
960 	for (i = 1; i < num_planes; i++) {
961 		if (hvs_format != HVS_PIXEL_FORMAT_H264) {
962 			vc4_dlist_write(vc4_state,
963 					VC4_SET_FIELD(fb->pitches[i],
964 						      SCALER_SRC_PITCH));
965 		} else {
966 			vc4_dlist_write(vc4_state, pitch0);
967 		}
968 	}
969 
970 	/* Colorspace conversion words */
971 	if (vc4_state->is_yuv) {
972 		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
973 		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
974 		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
975 	}
976 
977 	vc4_state->lbm_offset = 0;
978 
979 	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
980 	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
981 	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
982 	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
983 		/* Reserve a slot for the LBM Base Address. The real value will
984 		 * be set when calling vc4_plane_allocate_lbm().
985 		 */
986 		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
987 		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
988 			vc4_state->lbm_offset = vc4_state->dlist_count;
989 			vc4_dlist_counter_increment(vc4_state);
990 		}
991 
992 		if (num_planes > 1) {
993 			/* Emit Cb/Cr as channel 0 and Y as channel
994 			 * 1. This matches how we set up scl0/scl1
995 			 * above.
996 			 */
997 			vc4_write_scaling_parameters(state, 1);
998 		}
999 		vc4_write_scaling_parameters(state, 0);
1000 
1001 		/* If any PPF setup was done, then all the kernel
1002 		 * pointers get uploaded.
1003 		 */
1004 		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1005 		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1006 		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1007 		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1008 			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1009 						   SCALER_PPF_KERNEL_OFFSET);
1010 
1011 			/* HPPF plane 0 */
1012 			vc4_dlist_write(vc4_state, kernel);
1013 			/* VPPF plane 0 */
1014 			vc4_dlist_write(vc4_state, kernel);
1015 			/* HPPF plane 1 */
1016 			vc4_dlist_write(vc4_state, kernel);
1017 			/* VPPF plane 1 */
1018 			vc4_dlist_write(vc4_state, kernel);
1019 		}
1020 	}
1021 
1022 	vc4_state->dlist[ctl0_offset] |=
1023 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1024 
1025 	/* crtc_* are already clipped coordinates. */
1026 	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1027 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1028 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
1029 	/* Background fill might be necessary when the plane has per-pixel
1030 	 * alpha content or a non-opaque plane alpha and could blend from the
1031 	 * background or does not cover the entire screen.
1032 	 */
1033 	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1034 				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1035 
1036 	/* Flag the dlist as initialized to avoid checking it twice in case
1037 	 * the async update check already called vc4_plane_mode_set() and
1038 	 * decided to fallback to sync update because async update was not
1039 	 * possible.
1040 	 */
1041 	vc4_state->dlist_initialized = 1;
1042 
1043 	vc4_plane_calc_load(state);
1044 
1045 	return 0;
1046 }
1047 
1048 /* If a modeset involves changing the setup of a plane, the atomic
1049  * infrastructure will call this to validate a proposed plane setup.
1050  * However, if a plane isn't getting updated, this (and the
1051  * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
1052  * compute the dlist here and have all active plane dlists get updated
1053  * in the CRTC's flush.
1054  */
vc4_plane_atomic_check(struct drm_plane * plane,struct drm_atomic_state * state)1055 static int vc4_plane_atomic_check(struct drm_plane *plane,
1056 				  struct drm_atomic_state *state)
1057 {
1058 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
1059 										 plane);
1060 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
1061 	int ret;
1062 
1063 	vc4_state->dlist_count = 0;
1064 
1065 	if (!plane_enabled(new_plane_state))
1066 		return 0;
1067 
1068 	ret = vc4_plane_mode_set(plane, new_plane_state);
1069 	if (ret)
1070 		return ret;
1071 
1072 	return vc4_plane_allocate_lbm(new_plane_state);
1073 }
1074 
vc4_plane_atomic_update(struct drm_plane * plane,struct drm_atomic_state * state)1075 static void vc4_plane_atomic_update(struct drm_plane *plane,
1076 				    struct drm_atomic_state *state)
1077 {
1078 	/* No contents here.  Since we don't know where in the CRTC's
1079 	 * dlist we should be stored, our dlist is uploaded to the
1080 	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
1081 	 * time.
1082 	 */
1083 }
1084 
vc4_plane_write_dlist(struct drm_plane * plane,u32 __iomem * dlist)1085 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
1086 {
1087 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1088 	int i;
1089 
1090 	vc4_state->hw_dlist = dlist;
1091 
1092 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
1093 	for (i = 0; i < vc4_state->dlist_count; i++)
1094 		writel(vc4_state->dlist[i], &dlist[i]);
1095 
1096 	return vc4_state->dlist_count;
1097 }
1098 
vc4_plane_dlist_size(const struct drm_plane_state * state)1099 u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
1100 {
1101 	const struct vc4_plane_state *vc4_state =
1102 		container_of(state, typeof(*vc4_state), base);
1103 
1104 	return vc4_state->dlist_count;
1105 }
1106 
1107 /* Updates the plane to immediately (well, once the FIFO needs
1108  * refilling) scan out from at a new framebuffer.
1109  */
vc4_plane_async_set_fb(struct drm_plane * plane,struct drm_framebuffer * fb)1110 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
1111 {
1112 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1113 	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
1114 	uint32_t addr;
1115 
1116 	/* We're skipping the address adjustment for negative origin,
1117 	 * because this is only called on the primary plane.
1118 	 */
1119 	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
1120 	addr = bo->paddr + fb->offsets[0];
1121 
1122 	/* Write the new address into the hardware immediately.  The
1123 	 * scanout will start from this address as soon as the FIFO
1124 	 * needs to refill with pixels.
1125 	 */
1126 	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1127 
1128 	/* Also update the CPU-side dlist copy, so that any later
1129 	 * atomic updates that don't do a new modeset on our plane
1130 	 * also use our updated address.
1131 	 */
1132 	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
1133 }
1134 
vc4_plane_atomic_async_update(struct drm_plane * plane,struct drm_atomic_state * state)1135 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
1136 					  struct drm_atomic_state *state)
1137 {
1138 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
1139 										 plane);
1140 	struct vc4_plane_state *vc4_state, *new_vc4_state;
1141 
1142 	swap(plane->state->fb, new_plane_state->fb);
1143 	plane->state->crtc_x = new_plane_state->crtc_x;
1144 	plane->state->crtc_y = new_plane_state->crtc_y;
1145 	plane->state->crtc_w = new_plane_state->crtc_w;
1146 	plane->state->crtc_h = new_plane_state->crtc_h;
1147 	plane->state->src_x = new_plane_state->src_x;
1148 	plane->state->src_y = new_plane_state->src_y;
1149 	plane->state->src_w = new_plane_state->src_w;
1150 	plane->state->src_h = new_plane_state->src_h;
1151 	plane->state->alpha = new_plane_state->alpha;
1152 	plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode;
1153 	plane->state->rotation = new_plane_state->rotation;
1154 	plane->state->zpos = new_plane_state->zpos;
1155 	plane->state->normalized_zpos = new_plane_state->normalized_zpos;
1156 	plane->state->color_encoding = new_plane_state->color_encoding;
1157 	plane->state->color_range = new_plane_state->color_range;
1158 	plane->state->src = new_plane_state->src;
1159 	plane->state->dst = new_plane_state->dst;
1160 	plane->state->visible = new_plane_state->visible;
1161 
1162 	new_vc4_state = to_vc4_plane_state(new_plane_state);
1163 	vc4_state = to_vc4_plane_state(plane->state);
1164 
1165 	vc4_state->crtc_x = new_vc4_state->crtc_x;
1166 	vc4_state->crtc_y = new_vc4_state->crtc_y;
1167 	vc4_state->crtc_h = new_vc4_state->crtc_h;
1168 	vc4_state->crtc_w = new_vc4_state->crtc_w;
1169 	vc4_state->src_x = new_vc4_state->src_x;
1170 	vc4_state->src_y = new_vc4_state->src_y;
1171 	memcpy(vc4_state->src_w, new_vc4_state->src_w,
1172 	       sizeof(vc4_state->src_w));
1173 	memcpy(vc4_state->src_h, new_vc4_state->src_h,
1174 	       sizeof(vc4_state->src_h));
1175 	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
1176 	       sizeof(vc4_state->x_scaling));
1177 	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
1178 	       sizeof(vc4_state->y_scaling));
1179 	vc4_state->is_unity = new_vc4_state->is_unity;
1180 	vc4_state->is_yuv = new_vc4_state->is_yuv;
1181 	memcpy(vc4_state->offsets, new_vc4_state->offsets,
1182 	       sizeof(vc4_state->offsets));
1183 	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
1184 
1185 	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
1186 	vc4_state->dlist[vc4_state->pos0_offset] =
1187 		new_vc4_state->dlist[vc4_state->pos0_offset];
1188 	vc4_state->dlist[vc4_state->pos2_offset] =
1189 		new_vc4_state->dlist[vc4_state->pos2_offset];
1190 	vc4_state->dlist[vc4_state->ptr0_offset] =
1191 		new_vc4_state->dlist[vc4_state->ptr0_offset];
1192 
1193 	/* Note that we can't just call vc4_plane_write_dlist()
1194 	 * because that would smash the context data that the HVS is
1195 	 * currently using.
1196 	 */
1197 	writel(vc4_state->dlist[vc4_state->pos0_offset],
1198 	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
1199 	writel(vc4_state->dlist[vc4_state->pos2_offset],
1200 	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
1201 	writel(vc4_state->dlist[vc4_state->ptr0_offset],
1202 	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1203 }
1204 
vc4_plane_atomic_async_check(struct drm_plane * plane,struct drm_atomic_state * state)1205 static int vc4_plane_atomic_async_check(struct drm_plane *plane,
1206 					struct drm_atomic_state *state)
1207 {
1208 	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
1209 										 plane);
1210 	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
1211 	int ret;
1212 	u32 i;
1213 
1214 	ret = vc4_plane_mode_set(plane, new_plane_state);
1215 	if (ret)
1216 		return ret;
1217 
1218 	old_vc4_state = to_vc4_plane_state(plane->state);
1219 	new_vc4_state = to_vc4_plane_state(new_plane_state);
1220 	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
1221 	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
1222 	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
1223 	    old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
1224 	    vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state))
1225 		return -EINVAL;
1226 
1227 	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
1228 	 * if anything else has changed, fallback to a sync update.
1229 	 */
1230 	for (i = 0; i < new_vc4_state->dlist_count; i++) {
1231 		if (i == new_vc4_state->pos0_offset ||
1232 		    i == new_vc4_state->pos2_offset ||
1233 		    i == new_vc4_state->ptr0_offset ||
1234 		    (new_vc4_state->lbm_offset &&
1235 		     i == new_vc4_state->lbm_offset))
1236 			continue;
1237 
1238 		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
1239 			return -EINVAL;
1240 	}
1241 
1242 	return 0;
1243 }
1244 
vc4_prepare_fb(struct drm_plane * plane,struct drm_plane_state * state)1245 static int vc4_prepare_fb(struct drm_plane *plane,
1246 			  struct drm_plane_state *state)
1247 {
1248 	struct vc4_bo *bo;
1249 	int ret;
1250 
1251 	if (!state->fb)
1252 		return 0;
1253 
1254 	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1255 
1256 	drm_gem_plane_helper_prepare_fb(plane, state);
1257 
1258 	if (plane->state->fb == state->fb)
1259 		return 0;
1260 
1261 	ret = vc4_bo_inc_usecnt(bo);
1262 	if (ret)
1263 		return ret;
1264 
1265 	return 0;
1266 }
1267 
vc4_cleanup_fb(struct drm_plane * plane,struct drm_plane_state * state)1268 static void vc4_cleanup_fb(struct drm_plane *plane,
1269 			   struct drm_plane_state *state)
1270 {
1271 	struct vc4_bo *bo;
1272 
1273 	if (plane->state->fb == state->fb || !state->fb)
1274 		return;
1275 
1276 	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1277 	vc4_bo_dec_usecnt(bo);
1278 }
1279 
1280 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
1281 	.atomic_check = vc4_plane_atomic_check,
1282 	.atomic_update = vc4_plane_atomic_update,
1283 	.prepare_fb = vc4_prepare_fb,
1284 	.cleanup_fb = vc4_cleanup_fb,
1285 	.atomic_async_check = vc4_plane_atomic_async_check,
1286 	.atomic_async_update = vc4_plane_atomic_async_update,
1287 };
1288 
vc4_format_mod_supported(struct drm_plane * plane,uint32_t format,uint64_t modifier)1289 static bool vc4_format_mod_supported(struct drm_plane *plane,
1290 				     uint32_t format,
1291 				     uint64_t modifier)
1292 {
1293 	/* Support T_TILING for RGB formats only. */
1294 	switch (format) {
1295 	case DRM_FORMAT_XRGB8888:
1296 	case DRM_FORMAT_ARGB8888:
1297 	case DRM_FORMAT_ABGR8888:
1298 	case DRM_FORMAT_XBGR8888:
1299 	case DRM_FORMAT_RGB565:
1300 	case DRM_FORMAT_BGR565:
1301 	case DRM_FORMAT_ARGB1555:
1302 	case DRM_FORMAT_XRGB1555:
1303 		switch (fourcc_mod_broadcom_mod(modifier)) {
1304 		case DRM_FORMAT_MOD_LINEAR:
1305 		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
1306 			return true;
1307 		default:
1308 			return false;
1309 		}
1310 	case DRM_FORMAT_NV12:
1311 	case DRM_FORMAT_NV21:
1312 		switch (fourcc_mod_broadcom_mod(modifier)) {
1313 		case DRM_FORMAT_MOD_LINEAR:
1314 		case DRM_FORMAT_MOD_BROADCOM_SAND64:
1315 		case DRM_FORMAT_MOD_BROADCOM_SAND128:
1316 		case DRM_FORMAT_MOD_BROADCOM_SAND256:
1317 			return true;
1318 		default:
1319 			return false;
1320 		}
1321 	case DRM_FORMAT_RGBX1010102:
1322 	case DRM_FORMAT_BGRX1010102:
1323 	case DRM_FORMAT_RGBA1010102:
1324 	case DRM_FORMAT_BGRA1010102:
1325 	case DRM_FORMAT_YUV422:
1326 	case DRM_FORMAT_YVU422:
1327 	case DRM_FORMAT_YUV420:
1328 	case DRM_FORMAT_YVU420:
1329 	case DRM_FORMAT_NV16:
1330 	case DRM_FORMAT_NV61:
1331 	default:
1332 		return (modifier == DRM_FORMAT_MOD_LINEAR);
1333 	}
1334 }
1335 
1336 static const struct drm_plane_funcs vc4_plane_funcs = {
1337 	.update_plane = drm_atomic_helper_update_plane,
1338 	.disable_plane = drm_atomic_helper_disable_plane,
1339 	.destroy = drm_plane_cleanup,
1340 	.set_property = NULL,
1341 	.reset = vc4_plane_reset,
1342 	.atomic_duplicate_state = vc4_plane_duplicate_state,
1343 	.atomic_destroy_state = vc4_plane_destroy_state,
1344 	.format_mod_supported = vc4_format_mod_supported,
1345 };
1346 
vc4_plane_init(struct drm_device * dev,enum drm_plane_type type)1347 struct drm_plane *vc4_plane_init(struct drm_device *dev,
1348 				 enum drm_plane_type type)
1349 {
1350 	struct drm_plane *plane = NULL;
1351 	struct vc4_plane *vc4_plane;
1352 	u32 formats[ARRAY_SIZE(hvs_formats)];
1353 	int ret = 0;
1354 	unsigned i;
1355 	static const uint64_t modifiers[] = {
1356 		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
1357 		DRM_FORMAT_MOD_BROADCOM_SAND128,
1358 		DRM_FORMAT_MOD_BROADCOM_SAND64,
1359 		DRM_FORMAT_MOD_BROADCOM_SAND256,
1360 		DRM_FORMAT_MOD_LINEAR,
1361 		DRM_FORMAT_MOD_INVALID
1362 	};
1363 
1364 	vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
1365 				 GFP_KERNEL);
1366 	if (!vc4_plane)
1367 		return ERR_PTR(-ENOMEM);
1368 
1369 	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
1370 		formats[i] = hvs_formats[i].drm;
1371 
1372 	plane = &vc4_plane->base;
1373 	ret = drm_universal_plane_init(dev, plane, 0,
1374 				       &vc4_plane_funcs,
1375 				       formats, ARRAY_SIZE(formats),
1376 				       modifiers, type, NULL);
1377 	if (ret)
1378 		return ERR_PTR(ret);
1379 
1380 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
1381 
1382 	drm_plane_create_alpha_property(plane);
1383 	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
1384 					   DRM_MODE_ROTATE_0 |
1385 					   DRM_MODE_ROTATE_180 |
1386 					   DRM_MODE_REFLECT_X |
1387 					   DRM_MODE_REFLECT_Y);
1388 
1389 	return plane;
1390 }
1391 
vc4_plane_create_additional_planes(struct drm_device * drm)1392 int vc4_plane_create_additional_planes(struct drm_device *drm)
1393 {
1394 	struct drm_plane *cursor_plane;
1395 	struct drm_crtc *crtc;
1396 	unsigned int i;
1397 
1398 	/* Set up some arbitrary number of planes.  We're not limited
1399 	 * by a set number of physical registers, just the space in
1400 	 * the HVS (16k) and how small an plane can be (28 bytes).
1401 	 * However, each plane we set up takes up some memory, and
1402 	 * increases the cost of looping over planes, which atomic
1403 	 * modesetting does quite a bit.  As a result, we pick a
1404 	 * modest number of planes to expose, that should hopefully
1405 	 * still cover any sane usecase.
1406 	 */
1407 	for (i = 0; i < 16; i++) {
1408 		struct drm_plane *plane =
1409 			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
1410 
1411 		if (IS_ERR(plane))
1412 			continue;
1413 
1414 		plane->possible_crtcs =
1415 			GENMASK(drm->mode_config.num_crtc - 1, 0);
1416 	}
1417 
1418 	drm_for_each_crtc(crtc, drm) {
1419 		/* Set up the legacy cursor after overlay initialization,
1420 		 * since we overlay planes on the CRTC in the order they were
1421 		 * initialized.
1422 		 */
1423 		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
1424 		if (!IS_ERR(cursor_plane)) {
1425 			cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
1426 			crtc->cursor = cursor_plane;
1427 		}
1428 	}
1429 
1430 	return 0;
1431 }
1432