• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 
25 #include <string.h>
26 #include "common.h"
27 #include "vpe_priv.h"
28 #include "vpe10_dpp.h"
29 #include "color.h"
30 #include "vpe10/inc/vpe10_cm_common.h"
31 #include "hw_shared.h"
32 #include "reg_helper.h"
33 
34 #define CTX_BASE dpp
35 #define CTX      vpe10_dpp
36 
37 static struct dpp_funcs vpe10_dpp_funcs = {
38 
39     // cnv
40     .program_cnv            = vpe10_dpp_program_cnv,
41     .program_pre_dgam       = vpe10_dpp_cnv_program_pre_dgam,
42     .program_cnv_bias_scale = vpe10_dpp_program_cnv_bias_scale,
43     .program_alpha_keyer    = vpe10_dpp_cnv_program_alpha_keyer,
44     .program_crc            = vpe10_dpp_program_crc,
45 
46     // cm
47     .program_input_transfer_func = vpe10_dpp_program_input_transfer_func,
48     .program_gamut_remap         = vpe10_dpp_program_gamut_remap,
49     .program_post_csc            = vpe10_dpp_program_post_csc,
50     .set_hdr_multiplier          = vpe10_dpp_set_hdr_multiplier,
51 
52     // scaler
53     .get_optimal_number_of_taps  = vpe10_dpp_get_optimal_number_of_taps,
54     .dscl_calc_lb_num_partitions = vpe10_dscl_calc_lb_num_partitions,
55     .set_segment_scaler          = vpe10_dpp_set_segment_scaler,
56     .set_frame_scaler            = vpe10_dpp_set_frame_scaler,
57     .get_line_buffer_size        = vpe10_get_line_buffer_size,
58     .validate_number_of_taps     = vpe10_dpp_validate_number_of_taps,
59 };
60 
vpe10_construct_dpp(struct vpe_priv * vpe_priv,struct dpp * dpp)61 void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
62 {
63     dpp->vpe_priv = vpe_priv;
64     dpp->funcs    = &vpe10_dpp_funcs;
65 }
66 
vpe10_dpp_get_optimal_number_of_taps(struct dpp * dpp,struct scaler_data * scl_data,const struct vpe_scaling_taps * in_taps)67 bool vpe10_dpp_get_optimal_number_of_taps(
68     struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps)
69 {
70     struct vpe_priv *vpe_priv   = dpp->vpe_priv;
71     uint32_t         h_taps_min = 0, v_taps_min = 0;
72     /*
73      * Set default taps if none are provided
74      * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
75      * taps = 4 for upscaling
76      */
77     if (in_taps->h_taps > 8 || in_taps->v_taps > 8 || in_taps->h_taps_c > 8 ||
78         in_taps->v_taps_c > 8)
79         return false;
80 
81     if (vpe_fixpt_ceil(scl_data->ratios.horz) > 1)
82         h_taps_min = (uint32_t)max(4, min(2 * vpe_fixpt_ceil(scl_data->ratios.horz), 8));
83     else
84         h_taps_min = (uint32_t)4;
85 
86     if (in_taps->h_taps == 0) {
87         scl_data->taps.h_taps = h_taps_min;
88     } else {
89         if (in_taps->h_taps < h_taps_min)
90             return false;
91 
92         scl_data->taps.h_taps = in_taps->h_taps;
93     }
94 
95     if (vpe_fixpt_ceil(scl_data->ratios.vert) > 1)
96         v_taps_min =
97             (uint32_t)max(4, min(vpe_fixpt_ceil(vpe_fixpt_mul_int(scl_data->ratios.vert, 2)), 8));
98     else
99         v_taps_min = (uint32_t)4;
100 
101     if (in_taps->v_taps == 0) {
102         scl_data->taps.v_taps = v_taps_min;
103     } else {
104         if (in_taps->v_taps < v_taps_min)
105             return false;
106 
107         scl_data->taps.v_taps = in_taps->v_taps;
108     }
109 
110     if (in_taps->h_taps_c == 0) {
111         // default to 2 as mmd only uses bilinear for chroma
112         scl_data->taps.h_taps_c = (uint32_t)2;
113     } else
114         scl_data->taps.h_taps_c = in_taps->h_taps_c;
115 
116     if (in_taps->v_taps_c == 0) {
117         // default to 2 as mmd only uses bilinear for chroma
118         scl_data->taps.v_taps_c = (uint32_t)2;
119     } else
120         scl_data->taps.v_taps_c = in_taps->v_taps_c;
121 
122     /* taps can be either 1 or an even number */
123     if (scl_data->taps.h_taps % 2 && scl_data->taps.h_taps != 1)
124         scl_data->taps.h_taps++;
125 
126     if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
127         scl_data->taps.v_taps++;
128 
129     if (scl_data->taps.h_taps_c % 2 && scl_data->taps.h_taps_c != 1)
130         scl_data->taps.h_taps_c++;
131 
132     if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
133         scl_data->taps.v_taps_c++;
134 
135     // bypass scaler if all ratios are 1
136     if (IDENTITY_RATIO(scl_data->ratios.horz))
137         scl_data->taps.h_taps = 1;
138     if (IDENTITY_RATIO(scl_data->ratios.vert))
139         scl_data->taps.v_taps = 1;
140 
141     return true;
142 }
143 
vpe10_dscl_calc_lb_num_partitions(const struct scaler_data * scl_data,enum lb_memory_config lb_config,uint32_t * num_part_y,uint32_t * num_part_c)144 void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
145     enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c)
146 {
147     uint32_t memory_line_size_y, memory_line_size_c, memory_line_size_a, lb_memory_size,
148         lb_memory_size_c, lb_memory_size_a, num_partitions_a;
149 
150     uint32_t line_size   = scl_data->viewport.width < scl_data->recout.width
151                                ? scl_data->viewport.width
152                                : scl_data->recout.width;
153     uint32_t line_size_c = scl_data->viewport_c.width < scl_data->recout.width
154                                ? scl_data->viewport_c.width
155                                : scl_data->recout.width;
156 
157     if (line_size == 0)
158         line_size = 1;
159 
160     if (line_size_c == 0)
161         line_size_c = 1;
162 
163     memory_line_size_y = (line_size + 5) / 6;   /* +5 to ceil */
164     memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
165     memory_line_size_a = (line_size + 5) / 6;   /* +5 to ceil */
166 
167     // only has 1-piece lb config in vpe1
168     lb_memory_size   = 696;
169     lb_memory_size_c = 696;
170     lb_memory_size_a = 696;
171 
172     *num_part_y      = lb_memory_size / memory_line_size_y;
173     *num_part_c      = lb_memory_size_c / memory_line_size_c;
174     num_partitions_a = lb_memory_size_a / memory_line_size_a;
175 
176     if (scl_data->lb_params.alpha_en && (num_partitions_a < *num_part_y))
177         *num_part_y = num_partitions_a;
178 
179     if (*num_part_y > 12)
180         *num_part_y = 12;
181     if (*num_part_c > 12)
182         *num_part_c = 12;
183 }
184 
185 /* Not used as we don't enable prealpha dealpha currently
186  * Can skip for optimize performance and use default val
187  */
vpe10_dpp_program_prealpha_dealpha(struct dpp * dpp)188 static void vpe10_dpp_program_prealpha_dealpha(struct dpp *dpp)
189 {
190     uint32_t dealpha_en = 0, dealpha_ablnd_en = 0;
191     uint32_t realpha_en = 0, realpha_ablnd_en = 0;
192     uint32_t program_prealpha_dealpha = 0;
193     PROGRAM_ENTRY();
194 
195     if (program_prealpha_dealpha) {
196         dealpha_en = 1;
197         realpha_en = 1;
198     }
199     REG_SET_2(
200         VPCNVC_PRE_DEALPHA, 0, PRE_DEALPHA_EN, dealpha_en, PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en);
201     REG_SET_2(
202         VPCNVC_PRE_REALPHA, 0, PRE_REALPHA_EN, realpha_en, PRE_REALPHA_ABLND_EN, realpha_ablnd_en);
203 }
204 
205 /* Not used as we don't have special 2bit LUt currently
206  * Can skip for optimize performance and use default val
207  */
vpe10_dpp_program_alpha_2bit_lut(struct dpp * dpp,struct cnv_alpha_2bit_lut * alpha_2bit_lut)208 static void vpe10_dpp_program_alpha_2bit_lut(
209     struct dpp *dpp, struct cnv_alpha_2bit_lut *alpha_2bit_lut)
210 {
211     PROGRAM_ENTRY();
212 
213     if (alpha_2bit_lut != NULL) {
214         REG_SET_4(VPCNVC_ALPHA_2BIT_LUT, 0, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0, ALPHA_2BIT_LUT1,
215             alpha_2bit_lut->lut1, ALPHA_2BIT_LUT2, alpha_2bit_lut->lut2, ALPHA_2BIT_LUT3,
216             alpha_2bit_lut->lut3);
217     } else { // restore to default
218         REG_SET_DEFAULT(VPCNVC_ALPHA_2BIT_LUT);
219     }
220 }
221 
vpe10_dpp_program_cnv(struct dpp * dpp,enum vpe_surface_pixel_format format,enum vpe_expansion_mode mode)222 void vpe10_dpp_program_cnv(
223     struct dpp *dpp, enum vpe_surface_pixel_format format, enum vpe_expansion_mode mode)
224 {
225     uint32_t alpha_en     = 1;
226     uint32_t pixel_format = 0;
227     uint32_t hw_expansion_mode = 0;
228 
229     PROGRAM_ENTRY();
230 
231     switch (mode) {
232     case VPE_EXPANSION_MODE_DYNAMIC:
233         hw_expansion_mode = 0;
234         break;
235     case VPE_EXPANSION_MODE_ZERO:
236         hw_expansion_mode = 1;
237         break;
238     default:
239         VPE_ASSERT(0);
240         break;
241     }
242 
243     switch (format) {
244     case VPE_SURFACE_PIXEL_FORMAT_GRPH_XRGB8888:
245     case VPE_SURFACE_PIXEL_FORMAT_GRPH_XBGR8888:
246         alpha_en = 0;
247     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
248     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
249         pixel_format = 8;
250         break;
251     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBX8888:
252     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRX8888:
253         alpha_en = 0;
254     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA8888:
255     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA8888:
256         pixel_format = 9;
257         break;
258     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
259     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
260         pixel_format = 10;
261         break;
262     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA1010102:
263     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA1010102:
264         pixel_format = 11;
265         break;
266     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
267     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCbCr8888:
268         pixel_format = 12;
269         break;
270     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
271         pixel_format = 64;
272         alpha_en     = 0;
273         break;
274     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
275         pixel_format = 65;
276         alpha_en     = 0;
277         break;
278     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
279         pixel_format = 66;
280         alpha_en     = 0;
281         break;
282     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
283         pixel_format = 67;
284         alpha_en     = 0;
285         break;
286     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
287         pixel_format = 22;
288         break;
289     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
290     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
291         pixel_format = 24;
292         break;
293     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA16161616F:
294     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA16161616F:
295         pixel_format = 25;
296         break;
297     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
298         pixel_format = 114;
299         break;
300     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
301         pixel_format = 115;
302         break;
303     default:
304         break;
305     }
306 
307     REG_SET(VPCNVC_SURFACE_PIXEL_FORMAT, 0, VPCNVC_SURFACE_PIXEL_FORMAT, pixel_format);
308 
309     REG_SET_7(VPCNVC_FORMAT_CONTROL, 0, FORMAT_EXPANSION_MODE, hw_expansion_mode, FORMAT_CNV16, 0,
310         FORMAT_CONTROL__ALPHA_EN, alpha_en, VPCNVC_BYPASS, dpp->vpe_priv->init.debug.vpcnvc_bypass,
311         VPCNVC_BYPASS_MSB_ALIGN, 0, CLAMP_POSITIVE, 0, CLAMP_POSITIVE_C, 0);
312 }
313 
vpe10_dpp_program_cnv_bias_scale(struct dpp * dpp,struct bias_and_scale * bias_and_scale)314 void vpe10_dpp_program_cnv_bias_scale(struct dpp *dpp, struct bias_and_scale *bias_and_scale)
315 {
316     PROGRAM_ENTRY();
317 
318     REG_SET(VPCNVC_FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, bias_and_scale->bias_red);
319     REG_SET(VPCNVC_FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, bias_and_scale->bias_green);
320     REG_SET(VPCNVC_FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, bias_and_scale->bias_blue);
321 
322     REG_SET(VPCNVC_FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, bias_and_scale->scale_red);
323     REG_SET(VPCNVC_FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, bias_and_scale->scale_green);
324     REG_SET(VPCNVC_FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, bias_and_scale->scale_blue);
325 }
326 
vpe10_dpp_cnv_program_pre_dgam(struct dpp * dpp,enum color_transfer_func tr)327 void vpe10_dpp_cnv_program_pre_dgam(struct dpp *dpp, enum color_transfer_func tr)
328 {
329     int pre_degam_en          = 1;
330     int degamma_lut_selection = 0;
331 
332     PROGRAM_ENTRY();
333 
334     switch (tr) {
335     case TRANSFER_FUNC_LINEAR_0_125:
336         pre_degam_en = 0; // bypass
337         break;
338     case TRANSFER_FUNC_SRGB:
339         degamma_lut_selection = 0;
340         break;
341     case TRANSFER_FUNC_BT709:
342         degamma_lut_selection = 4;
343         break;
344     case TRANSFER_FUNC_PQ2084:
345         degamma_lut_selection = 5;
346         break;
347     default:
348         pre_degam_en = 0;
349         break;
350     }
351 
352     REG_SET_2(
353         VPCNVC_PRE_DEGAM, 0, PRE_DEGAM_MODE, pre_degam_en, PRE_DEGAM_SELECT, degamma_lut_selection);
354 }
355 
vpe10_dpp_cnv_program_alpha_keyer(struct dpp * dpp,struct cnv_color_keyer_params * color_keyer)356 void vpe10_dpp_cnv_program_alpha_keyer(struct dpp *dpp, struct cnv_color_keyer_params *color_keyer)
357 {
358     PROGRAM_ENTRY();
359 
360     REG_SET_2(VPCNVC_COLOR_KEYER_CONTROL, 0, COLOR_KEYER_EN, color_keyer->color_keyer_en,
361         COLOR_KEYER_MODE, color_keyer->color_keyer_mode);
362 
363     REG_SET_2(VPCNVC_COLOR_KEYER_ALPHA, 0, COLOR_KEYER_ALPHA_LOW,
364         color_keyer->color_keyer_alpha_low, COLOR_KEYER_ALPHA_HIGH,
365         color_keyer->color_keyer_alpha_high);
366 
367     REG_SET_2(VPCNVC_COLOR_KEYER_RED, 0, COLOR_KEYER_RED_LOW, color_keyer->color_keyer_red_low,
368         COLOR_KEYER_RED_HIGH, color_keyer->color_keyer_red_high);
369 
370     REG_SET_2(VPCNVC_COLOR_KEYER_GREEN, 0, COLOR_KEYER_GREEN_LOW,
371         color_keyer->color_keyer_green_low, COLOR_KEYER_GREEN_HIGH,
372         color_keyer->color_keyer_green_high);
373 
374     REG_SET_2(VPCNVC_COLOR_KEYER_BLUE, 0, COLOR_KEYER_BLUE_LOW, color_keyer->color_keyer_blue_low,
375         COLOR_KEYER_BLUE_HIGH, color_keyer->color_keyer_blue_high);
376 }
377 
vpe10_get_line_buffer_size()378 uint32_t vpe10_get_line_buffer_size()
379 {
380     return MAX_LINE_SIZE * MAX_LINE_CNT;
381 }
382 
vpe10_dpp_validate_number_of_taps(struct dpp * dpp,struct scaler_data * scl_data)383 bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data)
384 {
385     uint32_t num_part_y, num_part_c;
386     uint32_t max_taps_y, max_taps_c;
387     uint32_t min_taps_y, min_taps_c;
388 
389     /*Ensure we can support the requested number of vtaps*/
390     min_taps_y = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert);
391     min_taps_c = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c);
392 
393     dpp->funcs->dscl_calc_lb_num_partitions(scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
394 
395     /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */
396     if (vpe_fixpt_ceil(scl_data->ratios.vert) > 2)
397         max_taps_y = num_part_y - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert) - 2);
398     else
399         max_taps_y = num_part_y;
400 
401     if (vpe_fixpt_ceil(scl_data->ratios.vert_c) > 2)
402         max_taps_c = num_part_c - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c) - 2);
403     else
404         max_taps_c = num_part_c;
405 
406     if (max_taps_y < min_taps_y)
407         return false;
408     else if (max_taps_c < min_taps_c)
409         return false;
410 
411     if (scl_data->taps.v_taps > max_taps_y)
412         scl_data->taps.v_taps = max_taps_y;
413 
414     if (scl_data->taps.v_taps_c > max_taps_c)
415         scl_data->taps.v_taps_c = max_taps_c;
416 
417     if (IDENTITY_RATIO(scl_data->ratios.vert))
418         scl_data->taps.v_taps = 1;
419 
420     if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
421         scl_data->taps.v_taps++;
422 
423     if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
424         scl_data->taps.v_taps_c++;
425 
426     return true;
427 }
428 
vpe10_dpp_program_crc(struct dpp * dpp,bool enable)429 void vpe10_dpp_program_crc(struct dpp *dpp, bool enable)
430 {
431     PROGRAM_ENTRY();
432     REG_UPDATE(VPDPP_CRC_CTRL, VPDPP_CRC_EN, enable);
433 }
434 
435