1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2019 NXP.
4 *
5 * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com>
6 */
7
8 #include <linux/device.h>
9 #include <linux/slab.h>
10
11 #include "dcss-dev.h"
12
13 #define DCSS_SCALER_CTRL 0x00
14 #define SCALER_EN BIT(0)
15 #define REPEAT_EN BIT(4)
16 #define SCALE2MEM_EN BIT(8)
17 #define MEM2OFIFO_EN BIT(12)
18 #define DCSS_SCALER_OFIFO_CTRL 0x04
19 #define OFIFO_LOW_THRES_POS 0
20 #define OFIFO_LOW_THRES_MASK GENMASK(9, 0)
21 #define OFIFO_HIGH_THRES_POS 16
22 #define OFIFO_HIGH_THRES_MASK GENMASK(25, 16)
23 #define UNDERRUN_DETECT_CLR BIT(26)
24 #define LOW_THRES_DETECT_CLR BIT(27)
25 #define HIGH_THRES_DETECT_CLR BIT(28)
26 #define UNDERRUN_DETECT_EN BIT(29)
27 #define LOW_THRES_DETECT_EN BIT(30)
28 #define HIGH_THRES_DETECT_EN BIT(31)
29 #define DCSS_SCALER_SDATA_CTRL 0x08
30 #define YUV_EN BIT(0)
31 #define RTRAM_8LINES BIT(1)
32 #define Y_UV_BYTE_SWAP BIT(4)
33 #define A2R10G10B10_FORMAT_POS 8
34 #define A2R10G10B10_FORMAT_MASK GENMASK(11, 8)
35 #define DCSS_SCALER_BIT_DEPTH 0x0C
36 #define LUM_BIT_DEPTH_POS 0
37 #define LUM_BIT_DEPTH_MASK GENMASK(1, 0)
38 #define CHR_BIT_DEPTH_POS 4
39 #define CHR_BIT_DEPTH_MASK GENMASK(5, 4)
40 #define DCSS_SCALER_SRC_FORMAT 0x10
41 #define DCSS_SCALER_DST_FORMAT 0x14
42 #define FORMAT_MASK GENMASK(1, 0)
43 #define DCSS_SCALER_SRC_LUM_RES 0x18
44 #define DCSS_SCALER_SRC_CHR_RES 0x1C
45 #define DCSS_SCALER_DST_LUM_RES 0x20
46 #define DCSS_SCALER_DST_CHR_RES 0x24
47 #define WIDTH_POS 0
48 #define WIDTH_MASK GENMASK(11, 0)
49 #define HEIGHT_POS 16
50 #define HEIGHT_MASK GENMASK(27, 16)
51 #define DCSS_SCALER_V_LUM_START 0x48
52 #define V_START_MASK GENMASK(15, 0)
53 #define DCSS_SCALER_V_LUM_INC 0x4C
54 #define V_INC_MASK GENMASK(15, 0)
55 #define DCSS_SCALER_H_LUM_START 0x50
56 #define H_START_MASK GENMASK(18, 0)
57 #define DCSS_SCALER_H_LUM_INC 0x54
58 #define H_INC_MASK GENMASK(15, 0)
59 #define DCSS_SCALER_V_CHR_START 0x58
60 #define DCSS_SCALER_V_CHR_INC 0x5C
61 #define DCSS_SCALER_H_CHR_START 0x60
62 #define DCSS_SCALER_H_CHR_INC 0x64
63 #define DCSS_SCALER_COEF_VLUM 0x80
64 #define DCSS_SCALER_COEF_HLUM 0x140
65 #define DCSS_SCALER_COEF_VCHR 0x200
66 #define DCSS_SCALER_COEF_HCHR 0x300
67
68 struct dcss_scaler_ch {
69 void __iomem *base_reg;
70 u32 base_ofs;
71 struct dcss_scaler *scl;
72
73 u32 sdata_ctrl;
74 u32 scaler_ctrl;
75
76 bool scaler_ctrl_chgd;
77
78 u32 c_vstart;
79 u32 c_hstart;
80 };
81
82 struct dcss_scaler {
83 struct device *dev;
84
85 struct dcss_ctxld *ctxld;
86 u32 ctx_id;
87
88 struct dcss_scaler_ch ch[3];
89 };
90
91 /* scaler coefficients generator */
92 #define PSC_FRAC_BITS 30
93 #define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS)
94 #define PSC_BITS_FOR_PHASE 4
95 #define PSC_NUM_PHASES 16
96 #define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1)
97 #define PSC_NUM_TAPS 7
98 #define PSC_NUM_TAPS_RGBA 5
99 #define PSC_COEFF_PRECISION 10
100 #define PSC_PHASE_FRACTION_BITS 13
101 #define PSC_PHASE_MASK (PSC_NUM_PHASES - 1)
102 #define PSC_Q_FRACTION 19
103 #define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1))
104
105 /**
106 * mult_q() - Performs fixed-point multiplication.
107 * @A: multiplier
108 * @B: multiplicand
109 */
mult_q(int A,int B)110 static int mult_q(int A, int B)
111 {
112 int result;
113 s64 temp;
114
115 temp = (int64_t)A * (int64_t)B;
116 temp += PSC_Q_ROUND_OFFSET;
117 result = (int)(temp >> PSC_Q_FRACTION);
118 return result;
119 }
120
121 /**
122 * div_q() - Performs fixed-point division.
123 * @A: dividend
124 * @B: divisor
125 */
div_q(int A,int B)126 static int div_q(int A, int B)
127 {
128 int result;
129 s64 temp;
130
131 temp = (int64_t)A << PSC_Q_FRACTION;
132 if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0))
133 temp += B / 2;
134 else
135 temp -= B / 2;
136
137 result = (int)(temp / B);
138 return result;
139 }
140
141 /**
142 * exp_approx_q() - Compute approximation to exp(x) function using Taylor
143 * series.
144 * @x: fixed-point argument of exp function
145 */
exp_approx_q(int x)146 static int exp_approx_q(int x)
147 {
148 int sum = 1 << PSC_Q_FRACTION;
149 int term = 1 << PSC_Q_FRACTION;
150
151 term = mult_q(term, div_q(x, 1 << PSC_Q_FRACTION));
152 sum += term;
153 term = mult_q(term, div_q(x, 2 << PSC_Q_FRACTION));
154 sum += term;
155 term = mult_q(term, div_q(x, 3 << PSC_Q_FRACTION));
156 sum += term;
157 term = mult_q(term, div_q(x, 4 << PSC_Q_FRACTION));
158 sum += term;
159
160 return sum;
161 }
162
163 /**
164 * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter.
165 * @fc_q: fixed-point cutoff frequency normalized to range [0, 1]
166 * @use_5_taps: indicates whether to use 5 taps or 7 taps
167 * @coef: output filter coefficients
168 */
dcss_scaler_gaussian_filter(int fc_q,bool use_5_taps,bool phase0_identity,int coef[][PSC_NUM_TAPS])169 static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
170 bool phase0_identity,
171 int coef[][PSC_NUM_TAPS])
172 {
173 int sigma_q, g0_q, g1_q, g2_q;
174 int tap_cnt1, tap_cnt2, tap_idx, phase_cnt;
175 int mid;
176 int phase;
177 int i;
178 int taps;
179
180 if (use_5_taps)
181 for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
182 coef[phase][0] = 0;
183 coef[phase][PSC_NUM_TAPS - 1] = 0;
184 }
185
186 /* seed coefficient scanner */
187 taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS;
188 mid = (PSC_NUM_PHASES * taps) / 2 - 1;
189 phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2;
190 tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
191 tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2;
192
193 /* seed gaussian filter generator */
194 sigma_q = div_q(PSC_Q_ROUND_OFFSET, fc_q);
195 g0_q = 1 << PSC_Q_FRACTION;
196 g1_q = exp_approx_q(div_q(-PSC_Q_ROUND_OFFSET,
197 mult_q(sigma_q, sigma_q)));
198 g2_q = mult_q(g1_q, g1_q);
199 coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q;
200
201 for (i = 0; i < mid; i++) {
202 phase_cnt++;
203 tap_cnt1--;
204 tap_cnt2++;
205
206 g0_q = mult_q(g0_q, g1_q);
207 g1_q = mult_q(g1_q, g2_q);
208
209 if ((phase_cnt & PSC_PHASE_MASK) <= 8) {
210 tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE;
211 coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q;
212 }
213 if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) {
214 tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE;
215 coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q;
216 }
217 }
218
219 phase_cnt++;
220 tap_cnt1--;
221 coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0;
222
223 /* override phase 0 with identity filter if specified */
224 if (phase0_identity)
225 for (i = 0; i < PSC_NUM_TAPS; i++)
226 coef[0][i] = i == (PSC_NUM_TAPS >> 1) ?
227 (1 << PSC_COEFF_PRECISION) : 0;
228
229 /* normalize coef */
230 for (phase = 0; phase < PSC_STORED_PHASES; phase++) {
231 int sum = 0;
232 s64 ll_temp;
233
234 for (i = 0; i < PSC_NUM_TAPS; i++)
235 sum += coef[phase][i];
236 for (i = 0; i < PSC_NUM_TAPS; i++) {
237 ll_temp = coef[phase][i];
238 ll_temp <<= PSC_COEFF_PRECISION;
239 ll_temp += sum >> 1;
240 ll_temp /= sum;
241 coef[phase][i] = (int)ll_temp;
242 }
243 }
244 }
245
246 /**
247 * dcss_scaler_filter_design() - Compute filter coefficients using
248 * Gaussian filter.
249 * @src_length: length of input
250 * @dst_length: length of output
251 * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps
252 * @coef: output coefficients
253 */
dcss_scaler_filter_design(int src_length,int dst_length,bool use_5_taps,bool phase0_identity,int coef[][PSC_NUM_TAPS])254 static void dcss_scaler_filter_design(int src_length, int dst_length,
255 bool use_5_taps, bool phase0_identity,
256 int coef[][PSC_NUM_TAPS])
257 {
258 int fc_q;
259
260 /* compute cutoff frequency */
261 if (dst_length >= src_length)
262 fc_q = div_q(1, PSC_NUM_PHASES);
263 else
264 fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
265
266 /* compute gaussian filter coefficients */
267 dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
268 }
269
dcss_scaler_write(struct dcss_scaler_ch * ch,u32 val,u32 ofs)270 static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
271 {
272 struct dcss_scaler *scl = ch->scl;
273
274 dcss_ctxld_write(scl->ctxld, scl->ctx_id, val, ch->base_ofs + ofs);
275 }
276
dcss_scaler_ch_init_all(struct dcss_scaler * scl,unsigned long scaler_base)277 static int dcss_scaler_ch_init_all(struct dcss_scaler *scl,
278 unsigned long scaler_base)
279 {
280 struct dcss_scaler_ch *ch;
281 int i;
282
283 for (i = 0; i < 3; i++) {
284 ch = &scl->ch[i];
285
286 ch->base_ofs = scaler_base + i * 0x400;
287
288 ch->base_reg = ioremap(ch->base_ofs, SZ_4K);
289 if (!ch->base_reg) {
290 dev_err(scl->dev, "scaler: unable to remap ch base\n");
291 return -ENOMEM;
292 }
293
294 ch->scl = scl;
295 }
296
297 return 0;
298 }
299
dcss_scaler_init(struct dcss_dev * dcss,unsigned long scaler_base)300 int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base)
301 {
302 struct dcss_scaler *scaler;
303
304 scaler = kzalloc(sizeof(*scaler), GFP_KERNEL);
305 if (!scaler)
306 return -ENOMEM;
307
308 dcss->scaler = scaler;
309 scaler->dev = dcss->dev;
310 scaler->ctxld = dcss->ctxld;
311 scaler->ctx_id = CTX_SB_HP;
312
313 if (dcss_scaler_ch_init_all(scaler, scaler_base)) {
314 int i;
315
316 for (i = 0; i < 3; i++) {
317 if (scaler->ch[i].base_reg)
318 iounmap(scaler->ch[i].base_reg);
319 }
320
321 kfree(scaler);
322
323 return -ENOMEM;
324 }
325
326 return 0;
327 }
328
dcss_scaler_exit(struct dcss_scaler * scl)329 void dcss_scaler_exit(struct dcss_scaler *scl)
330 {
331 int ch_no;
332
333 for (ch_no = 0; ch_no < 3; ch_no++) {
334 struct dcss_scaler_ch *ch = &scl->ch[ch_no];
335
336 dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL);
337
338 if (ch->base_reg)
339 iounmap(ch->base_reg);
340 }
341
342 kfree(scl);
343 }
344
dcss_scaler_ch_enable(struct dcss_scaler * scl,int ch_num,bool en)345 void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en)
346 {
347 struct dcss_scaler_ch *ch = &scl->ch[ch_num];
348 u32 scaler_ctrl;
349
350 scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0;
351
352 if (en)
353 dcss_scaler_write(ch, ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL);
354
355 if (ch->scaler_ctrl != scaler_ctrl)
356 ch->scaler_ctrl_chgd = true;
357
358 ch->scaler_ctrl = scaler_ctrl;
359 }
360
dcss_scaler_yuv_enable(struct dcss_scaler_ch * ch,bool en)361 static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en)
362 {
363 ch->sdata_ctrl &= ~YUV_EN;
364 ch->sdata_ctrl |= en ? YUV_EN : 0;
365 }
366
dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch * ch,bool en)367 static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en)
368 {
369 ch->sdata_ctrl &= ~RTRAM_8LINES;
370 ch->sdata_ctrl |= en ? RTRAM_8LINES : 0;
371 }
372
dcss_scaler_bit_depth_set(struct dcss_scaler_ch * ch,int depth)373 static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth)
374 {
375 u32 val;
376
377 val = depth == 30 ? 2 : 0;
378
379 dcss_scaler_write(ch,
380 ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) |
381 ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK),
382 DCSS_SCALER_BIT_DEPTH);
383 }
384
385 enum buffer_format {
386 BUF_FMT_YUV420,
387 BUF_FMT_YUV422,
388 BUF_FMT_ARGB8888_YUV444,
389 };
390
391 enum chroma_location {
392 PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0,
393 PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1,
394 PSC_LOC_HORZ_0_VERT_0 = 2,
395 PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3,
396 PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4,
397 PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5
398 };
399
dcss_scaler_format_set(struct dcss_scaler_ch * ch,enum buffer_format src_fmt,enum buffer_format dst_fmt)400 static void dcss_scaler_format_set(struct dcss_scaler_ch *ch,
401 enum buffer_format src_fmt,
402 enum buffer_format dst_fmt)
403 {
404 dcss_scaler_write(ch, src_fmt, DCSS_SCALER_SRC_FORMAT);
405 dcss_scaler_write(ch, dst_fmt, DCSS_SCALER_DST_FORMAT);
406 }
407
dcss_scaler_res_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 pix_format,enum buffer_format dst_format)408 static void dcss_scaler_res_set(struct dcss_scaler_ch *ch,
409 int src_xres, int src_yres,
410 int dst_xres, int dst_yres,
411 u32 pix_format, enum buffer_format dst_format)
412 {
413 u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres;
414 u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres;
415 bool src_is_444 = true;
416
417 lsrc_xres = src_xres;
418 csrc_xres = src_xres;
419 lsrc_yres = src_yres;
420 csrc_yres = src_yres;
421 ldst_xres = dst_xres;
422 cdst_xres = dst_xres;
423 ldst_yres = dst_yres;
424 cdst_yres = dst_yres;
425
426 if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY ||
427 pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) {
428 csrc_xres >>= 1;
429 src_is_444 = false;
430 } else if (pix_format == DRM_FORMAT_NV12 ||
431 pix_format == DRM_FORMAT_NV21) {
432 csrc_xres >>= 1;
433 csrc_yres >>= 1;
434 src_is_444 = false;
435 }
436
437 if (dst_format == BUF_FMT_YUV422)
438 cdst_xres >>= 1;
439
440 /* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */
441 if (src_is_444 && dst_format == BUF_FMT_YUV422) {
442 lsrc_yres--;
443 csrc_yres--;
444 }
445
446 dcss_scaler_write(ch, (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
447 (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
448 DCSS_SCALER_SRC_LUM_RES);
449 dcss_scaler_write(ch, (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
450 (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK),
451 DCSS_SCALER_SRC_CHR_RES);
452 dcss_scaler_write(ch, (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
453 (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
454 DCSS_SCALER_DST_LUM_RES);
455 dcss_scaler_write(ch, (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) |
456 (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK),
457 DCSS_SCALER_DST_CHR_RES);
458 }
459
460 #define downscale_fp(factor, fp_pos) ((factor) << (fp_pos))
461 #define upscale_fp(factor, fp_pos) ((1 << (fp_pos)) / (factor))
462
463 struct dcss_scaler_factors {
464 int downscale;
465 int upscale;
466 };
467
468 static const struct dcss_scaler_factors dcss_scaler_factors[] = {
469 {3, 8}, {5, 8}, {5, 8},
470 };
471
dcss_scaler_fractions_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 src_format,u32 dst_format,enum chroma_location src_chroma_loc)472 static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch,
473 int src_xres, int src_yres,
474 int dst_xres, int dst_yres,
475 u32 src_format, u32 dst_format,
476 enum chroma_location src_chroma_loc)
477 {
478 int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres;
479 u32 l_vinc, l_hinc, c_vinc, c_hinc;
480 u32 c_vstart, c_hstart;
481
482 src_c_xres = src_xres;
483 src_c_yres = src_yres;
484 dst_c_xres = dst_xres;
485 dst_c_yres = dst_yres;
486
487 c_vstart = 0;
488 c_hstart = 0;
489
490 /* adjustments for source chroma location */
491 if (src_format == BUF_FMT_YUV420) {
492 /* vertical input chroma position adjustment */
493 switch (src_chroma_loc) {
494 case PSC_LOC_HORZ_0_VERT_1_OVER_4:
495 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
496 /*
497 * move chroma up to first luma line
498 * (1/4 chroma input line spacing)
499 */
500 c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
501 break;
502 case PSC_LOC_HORZ_0_VERT_1_OVER_2:
503 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
504 /*
505 * move chroma up to first luma line
506 * (1/2 chroma input line spacing)
507 */
508 c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1));
509 break;
510 default:
511 break;
512 }
513 /* horizontal input chroma position adjustment */
514 switch (src_chroma_loc) {
515 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4:
516 case PSC_LOC_HORZ_1_OVER_4_VERT_0:
517 case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2:
518 /* move chroma left 1/4 chroma input sample spacing */
519 c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2));
520 break;
521 default:
522 break;
523 }
524 }
525
526 /* adjustments to chroma resolution */
527 if (src_format == BUF_FMT_YUV420) {
528 src_c_xres >>= 1;
529 src_c_yres >>= 1;
530 } else if (src_format == BUF_FMT_YUV422) {
531 src_c_xres >>= 1;
532 }
533
534 if (dst_format == BUF_FMT_YUV422)
535 dst_c_xres >>= 1;
536
537 l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres;
538 c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres;
539 l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres;
540 c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres;
541
542 /* save chroma start phase */
543 ch->c_vstart = c_vstart;
544 ch->c_hstart = c_hstart;
545
546 dcss_scaler_write(ch, 0, DCSS_SCALER_V_LUM_START);
547 dcss_scaler_write(ch, l_vinc, DCSS_SCALER_V_LUM_INC);
548
549 dcss_scaler_write(ch, 0, DCSS_SCALER_H_LUM_START);
550 dcss_scaler_write(ch, l_hinc, DCSS_SCALER_H_LUM_INC);
551
552 dcss_scaler_write(ch, c_vstart, DCSS_SCALER_V_CHR_START);
553 dcss_scaler_write(ch, c_vinc, DCSS_SCALER_V_CHR_INC);
554
555 dcss_scaler_write(ch, c_hstart, DCSS_SCALER_H_CHR_START);
556 dcss_scaler_write(ch, c_hinc, DCSS_SCALER_H_CHR_INC);
557 }
558
dcss_scaler_get_min_max_ratios(struct dcss_scaler * scl,int ch_num,int * min,int * max)559 int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num,
560 int *min, int *max)
561 {
562 *min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16);
563 *max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16);
564
565 return 0;
566 }
567
dcss_scaler_program_5_coef_set(struct dcss_scaler_ch * ch,int base_addr,int coef[][PSC_NUM_TAPS])568 static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch,
569 int base_addr,
570 int coef[][PSC_NUM_TAPS])
571 {
572 int i, phase;
573
574 for (i = 0; i < PSC_STORED_PHASES; i++) {
575 dcss_scaler_write(ch, ((coef[i][1] & 0xfff) << 16 |
576 (coef[i][2] & 0xfff) << 4 |
577 (coef[i][3] & 0xf00) >> 8),
578 base_addr + i * sizeof(u32));
579 dcss_scaler_write(ch, ((coef[i][3] & 0x0ff) << 20 |
580 (coef[i][4] & 0xfff) << 8 |
581 (coef[i][5] & 0xff0) >> 4),
582 base_addr + 0x40 + i * sizeof(u32));
583 dcss_scaler_write(ch, ((coef[i][5] & 0x00f) << 24),
584 base_addr + 0x80 + i * sizeof(u32));
585 }
586
587 /* reverse both phase and tap orderings */
588 for (phase = (PSC_NUM_PHASES >> 1) - 1;
589 i < PSC_NUM_PHASES; i++, phase--) {
590 dcss_scaler_write(ch, ((coef[phase][5] & 0xfff) << 16 |
591 (coef[phase][4] & 0xfff) << 4 |
592 (coef[phase][3] & 0xf00) >> 8),
593 base_addr + i * sizeof(u32));
594 dcss_scaler_write(ch, ((coef[phase][3] & 0x0ff) << 20 |
595 (coef[phase][2] & 0xfff) << 8 |
596 (coef[phase][1] & 0xff0) >> 4),
597 base_addr + 0x40 + i * sizeof(u32));
598 dcss_scaler_write(ch, ((coef[phase][1] & 0x00f) << 24),
599 base_addr + 0x80 + i * sizeof(u32));
600 }
601 }
602
dcss_scaler_program_7_coef_set(struct dcss_scaler_ch * ch,int base_addr,int coef[][PSC_NUM_TAPS])603 static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch,
604 int base_addr,
605 int coef[][PSC_NUM_TAPS])
606 {
607 int i, phase;
608
609 for (i = 0; i < PSC_STORED_PHASES; i++) {
610 dcss_scaler_write(ch, ((coef[i][0] & 0xfff) << 16 |
611 (coef[i][1] & 0xfff) << 4 |
612 (coef[i][2] & 0xf00) >> 8),
613 base_addr + i * sizeof(u32));
614 dcss_scaler_write(ch, ((coef[i][2] & 0x0ff) << 20 |
615 (coef[i][3] & 0xfff) << 8 |
616 (coef[i][4] & 0xff0) >> 4),
617 base_addr + 0x40 + i * sizeof(u32));
618 dcss_scaler_write(ch, ((coef[i][4] & 0x00f) << 24 |
619 (coef[i][5] & 0xfff) << 12 |
620 (coef[i][6] & 0xfff)),
621 base_addr + 0x80 + i * sizeof(u32));
622 }
623
624 /* reverse both phase and tap orderings */
625 for (phase = (PSC_NUM_PHASES >> 1) - 1;
626 i < PSC_NUM_PHASES; i++, phase--) {
627 dcss_scaler_write(ch, ((coef[phase][6] & 0xfff) << 16 |
628 (coef[phase][5] & 0xfff) << 4 |
629 (coef[phase][4] & 0xf00) >> 8),
630 base_addr + i * sizeof(u32));
631 dcss_scaler_write(ch, ((coef[phase][4] & 0x0ff) << 20 |
632 (coef[phase][3] & 0xfff) << 8 |
633 (coef[phase][2] & 0xff0) >> 4),
634 base_addr + 0x40 + i * sizeof(u32));
635 dcss_scaler_write(ch, ((coef[phase][2] & 0x00f) << 24 |
636 (coef[phase][1] & 0xfff) << 12 |
637 (coef[phase][0] & 0xfff)),
638 base_addr + 0x80 + i * sizeof(u32));
639 }
640 }
641
dcss_scaler_yuv_coef_set(struct dcss_scaler_ch * ch,enum buffer_format src_format,enum buffer_format dst_format,bool use_5_taps,int src_xres,int src_yres,int dst_xres,int dst_yres)642 static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
643 enum buffer_format src_format,
644 enum buffer_format dst_format,
645 bool use_5_taps,
646 int src_xres, int src_yres, int dst_xres,
647 int dst_yres)
648 {
649 int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
650 bool program_5_taps = use_5_taps ||
651 (dst_format == BUF_FMT_YUV422 &&
652 src_format == BUF_FMT_ARGB8888_YUV444);
653
654 /* horizontal luma */
655 dcss_scaler_filter_design(src_xres, dst_xres, false,
656 src_xres == dst_xres, coef);
657 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
658
659 /* vertical luma */
660 dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
661 src_yres == dst_yres, coef);
662
663 if (program_5_taps)
664 dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
665 else
666 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
667
668 /* adjust chroma resolution */
669 if (src_format != BUF_FMT_ARGB8888_YUV444)
670 src_xres >>= 1;
671 if (src_format == BUF_FMT_YUV420)
672 src_yres >>= 1;
673 if (dst_format != BUF_FMT_ARGB8888_YUV444)
674 dst_xres >>= 1;
675 if (dst_format == BUF_FMT_YUV420) /* should not happen */
676 dst_yres >>= 1;
677
678 /* horizontal chroma */
679 dcss_scaler_filter_design(src_xres, dst_xres, false,
680 (src_xres == dst_xres) && (ch->c_hstart == 0),
681 coef);
682
683 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
684
685 /* vertical chroma */
686 dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
687 (src_yres == dst_yres) && (ch->c_vstart == 0),
688 coef);
689 if (program_5_taps)
690 dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
691 else
692 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
693 }
694
dcss_scaler_rgb_coef_set(struct dcss_scaler_ch * ch,int src_xres,int src_yres,int dst_xres,int dst_yres)695 static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
696 int src_xres, int src_yres, int dst_xres,
697 int dst_yres)
698 {
699 int coef[PSC_STORED_PHASES][PSC_NUM_TAPS];
700
701 /* horizontal RGB */
702 dcss_scaler_filter_design(src_xres, dst_xres, false,
703 src_xres == dst_xres, coef);
704 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
705
706 /* vertical RGB */
707 dcss_scaler_filter_design(src_yres, dst_yres, false,
708 src_yres == dst_yres, coef);
709 dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
710 }
711
dcss_scaler_set_rgb10_order(struct dcss_scaler_ch * ch,const struct drm_format_info * format)712 static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
713 const struct drm_format_info *format)
714 {
715 u32 a2r10g10b10_format;
716
717 if (format->is_yuv)
718 return;
719
720 ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK;
721
722 if (format->depth != 30)
723 return;
724
725 switch (format->format) {
726 case DRM_FORMAT_ARGB2101010:
727 case DRM_FORMAT_XRGB2101010:
728 a2r10g10b10_format = 0;
729 break;
730
731 case DRM_FORMAT_ABGR2101010:
732 case DRM_FORMAT_XBGR2101010:
733 a2r10g10b10_format = 5;
734 break;
735
736 case DRM_FORMAT_RGBA1010102:
737 case DRM_FORMAT_RGBX1010102:
738 a2r10g10b10_format = 6;
739 break;
740
741 case DRM_FORMAT_BGRA1010102:
742 case DRM_FORMAT_BGRX1010102:
743 a2r10g10b10_format = 11;
744 break;
745
746 default:
747 a2r10g10b10_format = 0;
748 break;
749 }
750
751 ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
752 }
753
dcss_scaler_setup(struct dcss_scaler * scl,int ch_num,const struct drm_format_info * format,int src_xres,int src_yres,int dst_xres,int dst_yres,u32 vrefresh_hz)754 void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
755 const struct drm_format_info *format,
756 int src_xres, int src_yres, int dst_xres, int dst_yres,
757 u32 vrefresh_hz)
758 {
759 struct dcss_scaler_ch *ch = &scl->ch[ch_num];
760 unsigned int pixel_depth = 0;
761 bool rtr_8line_en = false;
762 bool use_5_taps = false;
763 enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444;
764 enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444;
765 u32 pix_format = format->format;
766
767 if (format->is_yuv) {
768 dcss_scaler_yuv_enable(ch, true);
769
770 if (pix_format == DRM_FORMAT_NV12 ||
771 pix_format == DRM_FORMAT_NV21) {
772 rtr_8line_en = true;
773 src_format = BUF_FMT_YUV420;
774 } else if (pix_format == DRM_FORMAT_UYVY ||
775 pix_format == DRM_FORMAT_VYUY ||
776 pix_format == DRM_FORMAT_YUYV ||
777 pix_format == DRM_FORMAT_YVYU) {
778 src_format = BUF_FMT_YUV422;
779 }
780
781 use_5_taps = !rtr_8line_en;
782 } else {
783 dcss_scaler_yuv_enable(ch, false);
784
785 pixel_depth = format->depth;
786 }
787
788 dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres,
789 dst_yres, src_format, dst_format,
790 PSC_LOC_HORZ_0_VERT_1_OVER_4);
791
792 if (format->is_yuv)
793 dcss_scaler_yuv_coef_set(ch, src_format, dst_format,
794 use_5_taps, src_xres, src_yres,
795 dst_xres, dst_yres);
796 else
797 dcss_scaler_rgb_coef_set(ch, src_xres, src_yres,
798 dst_xres, dst_yres);
799
800 dcss_scaler_rtr_8lines_enable(ch, rtr_8line_en);
801 dcss_scaler_bit_depth_set(ch, pixel_depth);
802 dcss_scaler_set_rgb10_order(ch, format);
803 dcss_scaler_format_set(ch, src_format, dst_format);
804 dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres,
805 pix_format, dst_format);
806 }
807
808 /* This function will be called from interrupt context. */
dcss_scaler_write_sclctrl(struct dcss_scaler * scl)809 void dcss_scaler_write_sclctrl(struct dcss_scaler *scl)
810 {
811 int chnum;
812
813 dcss_ctxld_assert_locked(scl->ctxld);
814
815 for (chnum = 0; chnum < 3; chnum++) {
816 struct dcss_scaler_ch *ch = &scl->ch[chnum];
817
818 if (ch->scaler_ctrl_chgd) {
819 dcss_ctxld_write_irqsafe(scl->ctxld, scl->ctx_id,
820 ch->scaler_ctrl,
821 ch->base_ofs +
822 DCSS_SCALER_CTRL);
823 ch->scaler_ctrl_chgd = false;
824 }
825 }
826 }
827