1 /* Copyright 2022 Advanced Micro Devices, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors: AMD
22 *
23 */
24
25 #include <string.h>
26 #include "common.h"
27 #include "vpe_priv.h"
28 #include "vpe10_dpp.h"
29 #include "color.h"
30 #include "vpe10/inc/vpe10_cm_common.h"
31 #include "hw_shared.h"
32 #include "reg_helper.h"
33
34 #define CTX_BASE dpp
35 #define CTX vpe10_dpp
36
37 static struct dpp_funcs vpe10_dpp_funcs = {
38
39 // cnv
40 .program_cnv = vpe10_dpp_program_cnv,
41 .program_pre_dgam = vpe10_dpp_cnv_program_pre_dgam,
42 .program_cnv_bias_scale = vpe10_dpp_program_cnv_bias_scale,
43 .program_alpha_keyer = vpe10_dpp_cnv_program_alpha_keyer,
44 .program_crc = vpe10_dpp_program_crc,
45
46 // cm
47 .program_input_transfer_func = vpe10_dpp_program_input_transfer_func,
48 .program_gamut_remap = vpe10_dpp_program_gamut_remap,
49 .program_post_csc = vpe10_dpp_program_post_csc,
50 .set_hdr_multiplier = vpe10_dpp_set_hdr_multiplier,
51
52 // scaler
53 .get_optimal_number_of_taps = vpe10_dpp_get_optimal_number_of_taps,
54 .dscl_calc_lb_num_partitions = vpe10_dscl_calc_lb_num_partitions,
55 .set_segment_scaler = vpe10_dpp_set_segment_scaler,
56 .set_frame_scaler = vpe10_dpp_set_frame_scaler,
57 .get_line_buffer_size = vpe10_get_line_buffer_size,
58 .validate_number_of_taps = vpe10_dpp_validate_number_of_taps,
59 };
60
vpe10_construct_dpp(struct vpe_priv * vpe_priv,struct dpp * dpp)61 void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
62 {
63 dpp->vpe_priv = vpe_priv;
64 dpp->funcs = &vpe10_dpp_funcs;
65 }
66
vpe10_dpp_get_optimal_number_of_taps(struct dpp * dpp,struct scaler_data * scl_data,const struct vpe_scaling_taps * in_taps)67 bool vpe10_dpp_get_optimal_number_of_taps(
68 struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps)
69 {
70 struct vpe_priv *vpe_priv = dpp->vpe_priv;
71 uint32_t h_taps_min = 0, v_taps_min = 0;
72 /*
73 * Set default taps if none are provided
74 * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
75 * taps = 4 for upscaling
76 */
77 if (in_taps->h_taps > 8 || in_taps->v_taps > 8 || in_taps->h_taps_c > 8 ||
78 in_taps->v_taps_c > 8)
79 return false;
80
81 if (vpe_fixpt_ceil(scl_data->ratios.horz) > 1)
82 h_taps_min = (uint32_t)max(4, min(2 * vpe_fixpt_ceil(scl_data->ratios.horz), 8));
83 else
84 h_taps_min = (uint32_t)4;
85
86 if (in_taps->h_taps == 0) {
87 scl_data->taps.h_taps = h_taps_min;
88 } else {
89 if (in_taps->h_taps < h_taps_min)
90 return false;
91
92 scl_data->taps.h_taps = in_taps->h_taps;
93 }
94
95 if (vpe_fixpt_ceil(scl_data->ratios.vert) > 1)
96 v_taps_min =
97 (uint32_t)max(4, min(vpe_fixpt_ceil(vpe_fixpt_mul_int(scl_data->ratios.vert, 2)), 8));
98 else
99 v_taps_min = (uint32_t)4;
100
101 if (in_taps->v_taps == 0) {
102 scl_data->taps.v_taps = v_taps_min;
103 } else {
104 if (in_taps->v_taps < v_taps_min)
105 return false;
106
107 scl_data->taps.v_taps = in_taps->v_taps;
108 }
109
110 if (in_taps->h_taps_c == 0) {
111 // default to 2 as mmd only uses bilinear for chroma
112 scl_data->taps.h_taps_c = (uint32_t)2;
113 } else
114 scl_data->taps.h_taps_c = in_taps->h_taps_c;
115
116 if (in_taps->v_taps_c == 0) {
117 // default to 2 as mmd only uses bilinear for chroma
118 scl_data->taps.v_taps_c = (uint32_t)2;
119 } else
120 scl_data->taps.v_taps_c = in_taps->v_taps_c;
121
122 /* taps can be either 1 or an even number */
123 if (scl_data->taps.h_taps % 2 && scl_data->taps.h_taps != 1)
124 scl_data->taps.h_taps++;
125
126 if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
127 scl_data->taps.v_taps++;
128
129 if (scl_data->taps.h_taps_c % 2 && scl_data->taps.h_taps_c != 1)
130 scl_data->taps.h_taps_c++;
131
132 if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
133 scl_data->taps.v_taps_c++;
134
135 // bypass scaler if all ratios are 1
136 if (IDENTITY_RATIO(scl_data->ratios.horz))
137 scl_data->taps.h_taps = 1;
138 if (IDENTITY_RATIO(scl_data->ratios.vert))
139 scl_data->taps.v_taps = 1;
140
141 return true;
142 }
143
vpe10_dscl_calc_lb_num_partitions(const struct scaler_data * scl_data,enum lb_memory_config lb_config,uint32_t * num_part_y,uint32_t * num_part_c)144 void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
145 enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c)
146 {
147 uint32_t memory_line_size_y, memory_line_size_c, memory_line_size_a, lb_memory_size,
148 lb_memory_size_c, lb_memory_size_a, num_partitions_a;
149
150 uint32_t line_size = scl_data->viewport.width < scl_data->recout.width
151 ? scl_data->viewport.width
152 : scl_data->recout.width;
153 uint32_t line_size_c = scl_data->viewport_c.width < scl_data->recout.width
154 ? scl_data->viewport_c.width
155 : scl_data->recout.width;
156
157 if (line_size == 0)
158 line_size = 1;
159
160 if (line_size_c == 0)
161 line_size_c = 1;
162
163 memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */
164 memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
165 memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
166
167 // only has 1-piece lb config in vpe1
168 lb_memory_size = 696;
169 lb_memory_size_c = 696;
170 lb_memory_size_a = 696;
171
172 *num_part_y = lb_memory_size / memory_line_size_y;
173 *num_part_c = lb_memory_size_c / memory_line_size_c;
174 num_partitions_a = lb_memory_size_a / memory_line_size_a;
175
176 if (scl_data->lb_params.alpha_en && (num_partitions_a < *num_part_y))
177 *num_part_y = num_partitions_a;
178
179 if (*num_part_y > 12)
180 *num_part_y = 12;
181 if (*num_part_c > 12)
182 *num_part_c = 12;
183 }
184
185 /* Not used as we don't enable prealpha dealpha currently
186 * Can skip for optimize performance and use default val
187 */
vpe10_dpp_program_prealpha_dealpha(struct dpp * dpp)188 static void vpe10_dpp_program_prealpha_dealpha(struct dpp *dpp)
189 {
190 uint32_t dealpha_en = 0, dealpha_ablnd_en = 0;
191 uint32_t realpha_en = 0, realpha_ablnd_en = 0;
192 uint32_t program_prealpha_dealpha = 0;
193 PROGRAM_ENTRY();
194
195 if (program_prealpha_dealpha) {
196 dealpha_en = 1;
197 realpha_en = 1;
198 }
199 REG_SET_2(
200 VPCNVC_PRE_DEALPHA, 0, PRE_DEALPHA_EN, dealpha_en, PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en);
201 REG_SET_2(
202 VPCNVC_PRE_REALPHA, 0, PRE_REALPHA_EN, realpha_en, PRE_REALPHA_ABLND_EN, realpha_ablnd_en);
203 }
204
205 /* Not used as we don't have special 2bit LUt currently
206 * Can skip for optimize performance and use default val
207 */
vpe10_dpp_program_alpha_2bit_lut(struct dpp * dpp,struct cnv_alpha_2bit_lut * alpha_2bit_lut)208 static void vpe10_dpp_program_alpha_2bit_lut(
209 struct dpp *dpp, struct cnv_alpha_2bit_lut *alpha_2bit_lut)
210 {
211 PROGRAM_ENTRY();
212
213 if (alpha_2bit_lut != NULL) {
214 REG_SET_4(VPCNVC_ALPHA_2BIT_LUT, 0, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0, ALPHA_2BIT_LUT1,
215 alpha_2bit_lut->lut1, ALPHA_2BIT_LUT2, alpha_2bit_lut->lut2, ALPHA_2BIT_LUT3,
216 alpha_2bit_lut->lut3);
217 } else { // restore to default
218 REG_SET_DEFAULT(VPCNVC_ALPHA_2BIT_LUT);
219 }
220 }
221
vpe10_dpp_program_cnv(struct dpp * dpp,enum vpe_surface_pixel_format format,enum vpe_expansion_mode mode)222 void vpe10_dpp_program_cnv(
223 struct dpp *dpp, enum vpe_surface_pixel_format format, enum vpe_expansion_mode mode)
224 {
225 uint32_t alpha_en = 1;
226 uint32_t pixel_format = 0;
227 uint32_t hw_expansion_mode = 0;
228
229 PROGRAM_ENTRY();
230
231 switch (mode) {
232 case VPE_EXPANSION_MODE_DYNAMIC:
233 hw_expansion_mode = 0;
234 break;
235 case VPE_EXPANSION_MODE_ZERO:
236 hw_expansion_mode = 1;
237 break;
238 default:
239 VPE_ASSERT(0);
240 break;
241 }
242
243 switch (format) {
244 case VPE_SURFACE_PIXEL_FORMAT_GRPH_XRGB8888:
245 case VPE_SURFACE_PIXEL_FORMAT_GRPH_XBGR8888:
246 alpha_en = 0;
247 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
248 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
249 pixel_format = 8;
250 break;
251 case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBX8888:
252 case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRX8888:
253 alpha_en = 0;
254 case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA8888:
255 case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA8888:
256 pixel_format = 9;
257 break;
258 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
259 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
260 pixel_format = 10;
261 break;
262 case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA1010102:
263 case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA1010102:
264 pixel_format = 11;
265 break;
266 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
267 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCbCr8888:
268 pixel_format = 12;
269 break;
270 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
271 pixel_format = 64;
272 alpha_en = 0;
273 break;
274 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
275 pixel_format = 65;
276 alpha_en = 0;
277 break;
278 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
279 pixel_format = 66;
280 alpha_en = 0;
281 break;
282 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
283 pixel_format = 67;
284 alpha_en = 0;
285 break;
286 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
287 pixel_format = 22;
288 break;
289 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
290 case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
291 pixel_format = 24;
292 break;
293 case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA16161616F:
294 case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA16161616F:
295 pixel_format = 25;
296 break;
297 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
298 pixel_format = 114;
299 break;
300 case VPE_SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
301 pixel_format = 115;
302 break;
303 default:
304 break;
305 }
306
307 REG_SET(VPCNVC_SURFACE_PIXEL_FORMAT, 0, VPCNVC_SURFACE_PIXEL_FORMAT, pixel_format);
308
309 REG_SET_7(VPCNVC_FORMAT_CONTROL, 0, FORMAT_EXPANSION_MODE, hw_expansion_mode, FORMAT_CNV16, 0,
310 FORMAT_CONTROL__ALPHA_EN, alpha_en, VPCNVC_BYPASS, dpp->vpe_priv->init.debug.vpcnvc_bypass,
311 VPCNVC_BYPASS_MSB_ALIGN, 0, CLAMP_POSITIVE, 0, CLAMP_POSITIVE_C, 0);
312 }
313
vpe10_dpp_program_cnv_bias_scale(struct dpp * dpp,struct bias_and_scale * bias_and_scale)314 void vpe10_dpp_program_cnv_bias_scale(struct dpp *dpp, struct bias_and_scale *bias_and_scale)
315 {
316 PROGRAM_ENTRY();
317
318 REG_SET(VPCNVC_FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, bias_and_scale->bias_red);
319 REG_SET(VPCNVC_FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, bias_and_scale->bias_green);
320 REG_SET(VPCNVC_FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, bias_and_scale->bias_blue);
321
322 REG_SET(VPCNVC_FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, bias_and_scale->scale_red);
323 REG_SET(VPCNVC_FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, bias_and_scale->scale_green);
324 REG_SET(VPCNVC_FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, bias_and_scale->scale_blue);
325 }
326
vpe10_dpp_cnv_program_pre_dgam(struct dpp * dpp,enum color_transfer_func tr)327 void vpe10_dpp_cnv_program_pre_dgam(struct dpp *dpp, enum color_transfer_func tr)
328 {
329 int pre_degam_en = 1;
330 int degamma_lut_selection = 0;
331
332 PROGRAM_ENTRY();
333
334 switch (tr) {
335 case TRANSFER_FUNC_LINEAR_0_125:
336 pre_degam_en = 0; // bypass
337 break;
338 case TRANSFER_FUNC_SRGB:
339 degamma_lut_selection = 0;
340 break;
341 case TRANSFER_FUNC_BT709:
342 degamma_lut_selection = 4;
343 break;
344 case TRANSFER_FUNC_PQ2084:
345 degamma_lut_selection = 5;
346 break;
347 default:
348 pre_degam_en = 0;
349 break;
350 }
351
352 REG_SET_2(
353 VPCNVC_PRE_DEGAM, 0, PRE_DEGAM_MODE, pre_degam_en, PRE_DEGAM_SELECT, degamma_lut_selection);
354 }
355
vpe10_dpp_cnv_program_alpha_keyer(struct dpp * dpp,struct cnv_color_keyer_params * color_keyer)356 void vpe10_dpp_cnv_program_alpha_keyer(struct dpp *dpp, struct cnv_color_keyer_params *color_keyer)
357 {
358 PROGRAM_ENTRY();
359
360 REG_SET_2(VPCNVC_COLOR_KEYER_CONTROL, 0, COLOR_KEYER_EN, color_keyer->color_keyer_en,
361 COLOR_KEYER_MODE, color_keyer->color_keyer_mode);
362
363 REG_SET_2(VPCNVC_COLOR_KEYER_ALPHA, 0, COLOR_KEYER_ALPHA_LOW,
364 color_keyer->color_keyer_alpha_low, COLOR_KEYER_ALPHA_HIGH,
365 color_keyer->color_keyer_alpha_high);
366
367 REG_SET_2(VPCNVC_COLOR_KEYER_RED, 0, COLOR_KEYER_RED_LOW, color_keyer->color_keyer_red_low,
368 COLOR_KEYER_RED_HIGH, color_keyer->color_keyer_red_high);
369
370 REG_SET_2(VPCNVC_COLOR_KEYER_GREEN, 0, COLOR_KEYER_GREEN_LOW,
371 color_keyer->color_keyer_green_low, COLOR_KEYER_GREEN_HIGH,
372 color_keyer->color_keyer_green_high);
373
374 REG_SET_2(VPCNVC_COLOR_KEYER_BLUE, 0, COLOR_KEYER_BLUE_LOW, color_keyer->color_keyer_blue_low,
375 COLOR_KEYER_BLUE_HIGH, color_keyer->color_keyer_blue_high);
376 }
377
vpe10_get_line_buffer_size()378 uint32_t vpe10_get_line_buffer_size()
379 {
380 return MAX_LINE_SIZE * MAX_LINE_CNT;
381 }
382
vpe10_dpp_validate_number_of_taps(struct dpp * dpp,struct scaler_data * scl_data)383 bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data)
384 {
385 uint32_t num_part_y, num_part_c;
386 uint32_t max_taps_y, max_taps_c;
387 uint32_t min_taps_y, min_taps_c;
388
389 /*Ensure we can support the requested number of vtaps*/
390 min_taps_y = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert);
391 min_taps_c = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c);
392
393 dpp->funcs->dscl_calc_lb_num_partitions(scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
394
395 /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */
396 if (vpe_fixpt_ceil(scl_data->ratios.vert) > 2)
397 max_taps_y = num_part_y - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert) - 2);
398 else
399 max_taps_y = num_part_y;
400
401 if (vpe_fixpt_ceil(scl_data->ratios.vert_c) > 2)
402 max_taps_c = num_part_c - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c) - 2);
403 else
404 max_taps_c = num_part_c;
405
406 if (max_taps_y < min_taps_y)
407 return false;
408 else if (max_taps_c < min_taps_c)
409 return false;
410
411 if (scl_data->taps.v_taps > max_taps_y)
412 scl_data->taps.v_taps = max_taps_y;
413
414 if (scl_data->taps.v_taps_c > max_taps_c)
415 scl_data->taps.v_taps_c = max_taps_c;
416
417 if (IDENTITY_RATIO(scl_data->ratios.vert))
418 scl_data->taps.v_taps = 1;
419
420 if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
421 scl_data->taps.v_taps++;
422
423 if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
424 scl_data->taps.v_taps_c++;
425
426 return true;
427 }
428
vpe10_dpp_program_crc(struct dpp * dpp,bool enable)429 void vpe10_dpp_program_crc(struct dpp *dpp, bool enable)
430 {
431 PROGRAM_ENTRY();
432 REG_UPDATE(VPDPP_CRC_CTRL, VPDPP_CRC_EN, enable);
433 }
434
435