1 /* Copyright 2022 Advanced Micro Devices, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors: AMD
22 *
23 */
24
25 #include "vpe_priv.h"
26 #include "vpe10_dpp.h"
27
28 #define CTX vpe10_dpp
29 #define CTX_BASE dpp
30
31 #define NUM_PHASES 64
32 #define HORZ_MAX_TAPS 8
33 #define VERT_MAX_TAPS 8
34
35 #define LB_MAX_PARTITION 12
36
vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)37 bool vpe10_dpp_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
38 {
39 return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
40 format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END;
41 }
42
dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)43 static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)
44 {
45 return (format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
46 format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END);
47 }
48
vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data * data)49 enum vpe10_dscl_mode_sel vpe10_dpp_dscl_get_dscl_mode(const struct scaler_data *data)
50 {
51
52 const long long one = vpe_fixpt_one.value;
53
54 if (data->ratios.horz.value == one && data->ratios.vert.value == one &&
55 data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
56 return DSCL_MODE_DSCL_BYPASS;
57
58 if (!vpe10_dpp_dscl_is_ycbcr(data->format))
59 return DSCL_MODE_SCALING_444_RGB_ENABLE;
60
61 if (!dpp1_dscl_is_video_subsampled(data->format))
62 return DSCL_MODE_SCALING_444_YCBCR_ENABLE;
63
64 if (data->ratios.horz.value == one && data->ratios.vert.value == one)
65 return DSCL_MODE_SCALING_420_LUMA_BYPASS;
66
67 return DSCL_MODE_SCALING_420_YCBCR_ENABLE;
68 }
69
vpe10_dpp_dscl_set_dscl_mode(struct dpp * dpp,enum vpe10_dscl_mode_sel dscl_mode)70 void vpe10_dpp_dscl_set_dscl_mode(struct dpp *dpp, enum vpe10_dscl_mode_sel dscl_mode)
71 {
72
73 PROGRAM_ENTRY();
74
75 REG_SET(VPDSCL_MODE, 0, VPDSCL_MODE, dscl_mode);
76 }
77
dpp1_dscl_set_recout(struct dpp * dpp,const struct vpe_rect * recout)78 static void dpp1_dscl_set_recout(struct dpp *dpp, const struct vpe_rect *recout)
79 {
80
81 PROGRAM_ENTRY();
82
83 REG_SET_2(VPDSCL_RECOUT_START, 0, RECOUT_START_X, recout->x, RECOUT_START_Y, recout->y);
84
85 REG_SET_2(VPDSCL_RECOUT_SIZE, 0, RECOUT_WIDTH, recout->width, RECOUT_HEIGHT, recout->height);
86 }
87
dpp1_dscl_set_mpc_size(struct dpp * dpp,const struct scaler_data * scl_data)88 static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *scl_data)
89 {
90
91 PROGRAM_ENTRY();
92
93 REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active);
94 }
95
vpe10_dpp_dscl_set_h_blank(struct dpp * dpp,uint16_t start,uint16_t end)96 void vpe10_dpp_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
97 {
98
99 PROGRAM_ENTRY();
100 REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start);
101 }
102
vpe10_dpp_dscl_set_v_blank(struct dpp * dpp,uint16_t start,uint16_t end)103 void vpe10_dpp_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
104 {
105
106 PROGRAM_ENTRY();
107 REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start);
108 }
109
vpe10_dpp_dscl_set_taps(struct dpp * dpp,const struct scaler_data * scl_data)110 void vpe10_dpp_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
111 {
112
113 PROGRAM_ENTRY();
114
115 REG_SET_4(VPDSCL_TAP_CONTROL, 0, SCL_V_NUM_TAPS, scl_data->taps.v_taps - 1, SCL_H_NUM_TAPS,
116 scl_data->taps.h_taps - 1, SCL_V_NUM_TAPS_C, scl_data->taps.v_taps_c - 1, SCL_H_NUM_TAPS_C,
117 scl_data->taps.h_taps_c - 1);
118 }
119
dpp1_dscl_get_filter_coeffs_64p(int taps,struct fixed31_32 ratio)120 static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
121 {
122 if (taps == 8)
123 return vpe_get_filter_8tap_64p(ratio);
124 else if (taps == 6)
125 return vpe_get_filter_6tap_64p(ratio);
126 else if (taps == 4)
127 return vpe_get_filter_4tap_64p(ratio);
128 else if (taps == 2)
129 return vpe_get_2tap_bilinear_64p();
130 else if (taps == 1)
131 return NULL;
132 else {
133 /* should never happen, bug */
134 return NULL;
135 }
136 }
137
vpe10_dpp_dscl_set_scaler_filter(struct dpp * dpp,uint32_t taps,enum vpe10_coef_filter_type_sel filter_type,const uint16_t * filter)138 void vpe10_dpp_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
139 enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter)
140 {
141 const int tap_pairs = (taps + 1) / 2;
142 int phase;
143 int pair;
144 uint16_t odd_coef, even_coef;
145
146 PROGRAM_ENTRY();
147
148 REG_SET_3(VPDSCL_COEF_RAM_TAP_SELECT, 0, SCL_COEF_RAM_TAP_PAIR_IDX, 0, SCL_COEF_RAM_PHASE, 0,
149 SCL_COEF_RAM_FILTER_TYPE, filter_type);
150
151 for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
152 for (pair = 0; pair < tap_pairs; pair++) {
153 even_coef = filter[phase * (int)taps + 2 * pair];
154 if ((pair * 2 + 1) < (int)taps)
155 odd_coef = filter[phase * (int)taps + 2 * pair + 1];
156 else
157 odd_coef = 0;
158
159 REG_SET_4(VPDSCL_COEF_RAM_TAP_DATA, 0,
160 /* Even tap coefficient (bits 1:0 fixed to 0) */
161 SCL_COEF_RAM_EVEN_TAP_COEF, even_coef,
162 /* Write/read control for even coefficient */
163 SCL_COEF_RAM_EVEN_TAP_COEF_EN, 1,
164 /* Odd tap coefficient (bits 1:0 fixed to 0) */
165 SCL_COEF_RAM_ODD_TAP_COEF, odd_coef,
166 /* Write/read control for odd coefficient */
167 SCL_COEF_RAM_ODD_TAP_COEF_EN, 1);
168 }
169 }
170 }
171
vpe10_dpp_dscl_set_scl_filter(struct dpp * dpp,const struct scaler_data * scl_data,enum vpe10_dscl_mode_sel scl_mode,bool chroma_coef_mode)172 void vpe10_dpp_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
173 enum vpe10_dscl_mode_sel scl_mode, bool chroma_coef_mode)
174 {
175
176 const uint16_t *filter_h = NULL;
177 const uint16_t *filter_v = NULL;
178 const uint16_t *filter_h_c = NULL;
179 const uint16_t *filter_v_c = NULL;
180
181 PROGRAM_ENTRY();
182
183 if (scl_data->polyphase_filter_coeffs == 0) /*no externally provided set of coeffs and taps*/
184 {
185 filter_h = (uint16_t *)dpp1_dscl_get_filter_coeffs_64p(
186 (int)scl_data->taps.h_taps, scl_data->ratios.horz);
187 filter_v =
188 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps, scl_data->ratios.vert);
189 } else {
190 filter_h = (const uint16_t *)&scl_data->polyphase_filter_coeffs->horiz_polyphase_coeffs;
191 filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs;
192 }
193 if (filter_h != NULL)
194 vpe10_dpp_dscl_set_scaler_filter(
195 dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h);
196
197 if (filter_v != NULL)
198 vpe10_dpp_dscl_set_scaler_filter(
199 dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v);
200
201 if (chroma_coef_mode) {
202
203 filter_h_c =
204 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.h_taps_c, scl_data->ratios.horz_c);
205 filter_v_c =
206 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
207
208 if (filter_h_c != NULL)
209 vpe10_dpp_dscl_set_scaler_filter(
210 dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
211
212 if (filter_v_c != NULL)
213 vpe10_dpp_dscl_set_scaler_filter(
214 dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
215 }
216
217 REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode);
218 }
219
vpe10_dpp_dscl_set_lb(struct dpp * dpp,const struct line_buffer_params * lb_params,enum lb_memory_config mem_size_config)220 void vpe10_dpp_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
221 enum lb_memory_config mem_size_config)
222 {
223
224 PROGRAM_ENTRY();
225
226 REG_SET(VPLB_DATA_FORMAT, 0, ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
227
228 REG_SET_2(
229 VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION);
230 }
231
vpe10_dpp_dscl_set_scale_ratio(struct dpp * dpp,const struct scaler_data * data)232 void vpe10_dpp_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
233 {
234
235 PROGRAM_ENTRY();
236
237 REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO, 0, SCL_H_SCALE_RATIO,
238 vpe_fixpt_u3d19(data->ratios.horz) << 5);
239
240 REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO, 0, SCL_V_SCALE_RATIO,
241 vpe_fixpt_u3d19(data->ratios.vert) << 5);
242
243 REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO_C, 0, SCL_H_SCALE_RATIO_C,
244 vpe_fixpt_u3d19(data->ratios.horz_c) << 5);
245
246 REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO_C, 0, SCL_V_SCALE_RATIO_C,
247 vpe_fixpt_u3d19(data->ratios.vert_c) << 5);
248 }
249
dpp1_dscl_set_scaler_position(struct dpp * dpp,const struct scaler_data * data)250 static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_data *data)
251 {
252 uint32_t init_frac = 0;
253 uint32_t init_int = 0;
254
255 PROGRAM_ENTRY();
256
257 /*
258 * 0.24 format for fraction, first five bits zeroed
259 */
260 init_frac = vpe_fixpt_u0d19(data->inits.h) << 5;
261 init_int = (uint32_t)vpe_fixpt_floor(data->inits.h);
262 REG_SET_2(VPDSCL_HORZ_FILTER_INIT, 0, SCL_H_INIT_FRAC, init_frac, SCL_H_INIT_INT, init_int);
263
264 init_frac = vpe_fixpt_u0d19(data->inits.h_c) << 5;
265 init_int = (uint32_t)vpe_fixpt_floor(data->inits.h_c);
266 REG_SET_2(
267 VPDSCL_HORZ_FILTER_INIT_C, 0, SCL_H_INIT_FRAC_C, init_frac, SCL_H_INIT_INT_C, init_int);
268
269 init_frac = vpe_fixpt_u0d19(data->inits.v) << 5;
270 init_int = (uint32_t)vpe_fixpt_floor(data->inits.v);
271 REG_SET_2(VPDSCL_VERT_FILTER_INIT, 0, SCL_V_INIT_FRAC, init_frac, SCL_V_INIT_INT, init_int);
272
273 init_frac = vpe_fixpt_u0d19(data->inits.v_c) << 5;
274 init_int = (uint32_t)vpe_fixpt_floor(data->inits.v_c);
275 REG_SET_2(
276 VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int);
277 }
278
vpe10_dpp_power_on_dscl(struct dpp * dpp,bool power_on)279 void vpe10_dpp_power_on_dscl(struct dpp *dpp, bool power_on)
280 {
281 PROGRAM_ENTRY();
282
283 if (dpp->vpe_priv->init.debug.enable_mem_low_power.bits.dscl) {
284 if (power_on) {
285 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
286 LUT_MEM_PWR_FORCE, 0);
287
288 // introduce a delay by dummy set
289 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
290 LUT_MEM_PWR_FORCE, 0);
291
292 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
293 LUT_MEM_PWR_FORCE, 0);
294 } else {
295 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
296 LUT_MEM_PWR_FORCE, 3);
297 }
298 } else {
299 if (power_on) {
300 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 1,
301 LUT_MEM_PWR_FORCE, 0);
302 } else {
303 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
304 LUT_MEM_PWR_FORCE, 0);
305 }
306 }
307 }
308
vpe10_dpp_set_segment_scaler(struct dpp * dpp,const struct scaler_data * scl_data)309 void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
310 {
311
312 enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
313
314 dpp1_dscl_set_recout(dpp, &scl_data->recout);
315 dpp1_dscl_set_mpc_size(dpp, scl_data);
316
317 if (dscl_mode == DSCL_MODE_DSCL_BYPASS)
318 return;
319
320 dpp1_dscl_set_scaler_position(dpp, scl_data);
321 }
322
vpe10_dpp_set_frame_scaler(struct dpp * dpp,const struct scaler_data * scl_data)323 void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
324 {
325
326 enum vpe10_dscl_mode_sel dscl_mode = vpe10_dpp_dscl_get_dscl_mode(scl_data);
327 bool ycbcr = vpe10_dpp_dscl_is_ycbcr(scl_data->format);
328
329 vpe10_dpp_dscl_set_h_blank(dpp, 1, 0);
330 vpe10_dpp_dscl_set_v_blank(dpp, 1, 0);
331
332 if (dscl_mode != DSCL_MODE_DSCL_BYPASS)
333 vpe10_dpp_power_on_dscl(dpp, true);
334
335 vpe10_dpp_dscl_set_dscl_mode(dpp, dscl_mode);
336
337 if (dscl_mode == DSCL_MODE_DSCL_BYPASS) {
338 vpe10_dpp_power_on_dscl(dpp, false);
339 return;
340 }
341
342 vpe10_dpp_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
343 vpe10_dpp_dscl_set_scale_ratio(dpp, scl_data);
344 vpe10_dpp_dscl_set_taps(dpp, scl_data);
345 vpe10_dpp_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
346 }
347