1
2 /*
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26 #include <drm/drm_dsc.h>
27
28 #include "os_types.h"
29 #include "rc_calc.h"
30 #include "qp_tables.h"
31
32 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
33
34 #define MODE_SELECT(val444, val422, val420) \
35 (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
36
37
38 #define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
39 table = qp_table_##mode##_##bpc##bpc_##max; \
40 table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
41 break
42
43
get_qp_set(qp_set qps,enum colour_mode cm,enum bits_per_comp bpc,enum max_min max_min,float bpp)44 static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
45 enum max_min max_min, float bpp)
46 {
47 int mode = MODE_SELECT(444, 422, 420);
48 int sel = table_hash(mode, bpc, max_min);
49 int table_size = 0;
50 int index;
51 const struct qp_entry *table = 0L;
52
53 // alias enum
54 enum { min = MM_MIN, max = MM_MAX };
55 switch (sel) {
56 TABLE_CASE(444, 8, max);
57 TABLE_CASE(444, 8, min);
58 TABLE_CASE(444, 10, max);
59 TABLE_CASE(444, 10, min);
60 TABLE_CASE(444, 12, max);
61 TABLE_CASE(444, 12, min);
62 TABLE_CASE(422, 8, max);
63 TABLE_CASE(422, 8, min);
64 TABLE_CASE(422, 10, max);
65 TABLE_CASE(422, 10, min);
66 TABLE_CASE(422, 12, max);
67 TABLE_CASE(422, 12, min);
68 TABLE_CASE(420, 8, max);
69 TABLE_CASE(420, 8, min);
70 TABLE_CASE(420, 10, max);
71 TABLE_CASE(420, 10, min);
72 TABLE_CASE(420, 12, max);
73 TABLE_CASE(420, 12, min);
74 }
75
76 if (table == 0)
77 return;
78
79 index = (bpp - table[0].bpp) * 2;
80
81 /* requested size is bigger than the table */
82 if (index >= table_size) {
83 dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
84 return;
85 }
86
87 memcpy(qps, table[index].qps, sizeof(qp_set));
88 }
89
dsc_roundf(double num)90 static double dsc_roundf(double num)
91 {
92 if (num < 0.0)
93 num = num - 0.5;
94 else
95 num = num + 0.5;
96
97 return (int)(num);
98 }
99
dsc_ceil(double num)100 static double dsc_ceil(double num)
101 {
102 double retval = (int)num;
103
104 if (retval != num && num > 0)
105 retval = num + 1;
106
107 return (int)retval;
108 }
109
get_ofs_set(qp_set ofs,enum colour_mode mode,float bpp)110 static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
111 {
112 int *p = ofs;
113
114 if (mode == CM_444 || mode == CM_RGB) {
115 *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
116 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (8 / 3.0))))));
117 *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
118 *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
119 *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf((bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp - 12) * (6 / 3.0))))));
120 *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf((bpp - 12) * (6 / 3.0))));
121 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf((bpp - 12) * (6 / 3.0))));
122 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf((bpp - 12) * (4 / 3.0))));
123 *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf((bpp - 12) * (2 / 3.0))));
124 *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf((bpp - 12) * (2 / 3.0))));
125 *p++ = -10;
126 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2 / 2.0))));
127 *p++ = -12;
128 *p++ = -12;
129 *p++ = -12;
130 } else if (mode == CM_422) {
131 *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf((bpp - 8) * (8 / 2.0))));
132 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf((bpp - 8) * (8 / 2.0))));
133 *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf((bpp - 8) * (6 / 2.0))));
134 *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf((bpp - 8) * (6 / 2.0))));
135 *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf((bpp - 8) * (6 / 2.0))));
136 *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf((bpp - 8) * (6 / 2.0))));
137 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf((bpp - 8) * (6 / 2.0))));
138 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf((bpp - 8) * (4 / 2.0))));
139 *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf((bpp - 8) * (2 / 2.0))));
140 *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf((bpp - 8) * (2 / 2.0))));
141 *p++ = -10;
142 *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf((bpp - 6) * (2.0 / 1))));
143 *p++ = -12;
144 *p++ = -12;
145 *p++ = -12;
146 } else {
147 *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf((bpp - 6) * (8 / 2.0))));
148 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf((bpp - 6) * (8 / 2.0))));
149 *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf((bpp - 6) * (6 / 2.0))));
150 *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf((bpp - 6) * (6 / 2.0))));
151 *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf((bpp - 6) * (6 / 2.0))));
152 *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf((bpp - 6) * (6 / 2.0))));
153 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf((bpp - 6) * (6 / 2.0))));
154 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf((bpp - 6) * (4 / 2.0))));
155 *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf((bpp - 6) * (2 / 2.0))));
156 *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf((bpp - 6) * (2 / 2.0))));
157 *p++ = -10;
158 *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf((bpp - 4) * (2 / 1.0))));
159 *p++ = -12;
160 *p++ = -12;
161 *p++ = -12;
162 }
163 }
164
median3(int a,int b,int c)165 static int median3(int a, int b, int c)
166 {
167 if (a > b)
168 swap(a, b);
169 if (b > c)
170 swap(b, c);
171 if (a > b)
172 swap(b, c);
173
174 return b;
175 }
176
_do_calc_rc_params(struct rc_params * rc,enum colour_mode cm,enum bits_per_comp bpc,u16 drm_bpp,bool is_navite_422_or_420,int slice_width,int slice_height,int minor_version)177 static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
178 enum bits_per_comp bpc, u16 drm_bpp,
179 bool is_navite_422_or_420,
180 int slice_width, int slice_height,
181 int minor_version)
182 {
183 float bpp;
184 float bpp_group;
185 float initial_xmit_delay_factor;
186 int padding_pixels;
187 int i;
188
189 bpp = ((float)drm_bpp / 16.0);
190 /* in native_422 or native_420 modes, the bits_per_pixel is double the
191 * target bpp (the latter is what calc_rc_params expects)
192 */
193 if (is_navite_422_or_420)
194 bpp /= 2.0;
195
196 rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
197 rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
198
199 bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0);
200
201 switch (cm) {
202 case CM_420:
203 rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf((bpp - 4) * (512))) : (5632 - dsc_roundf((bpp - 5) * (3584)))));
204 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 3) - (3 * bpp_group)));
205 rc->second_line_bpg_offset = median3(0, 12, (int)((3 * bpc * 3) - (3 * bpp_group)));
206 break;
207 case CM_422:
208 rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf((bpp - 7) * (3584))));
209 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)((3 * bpc * 4) - (3 * bpp_group)));
210 rc->second_line_bpg_offset = 0;
211 break;
212 case CM_444:
213 case CM_RGB:
214 rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf((bpp - 8) * (512 / 2))) : (5632 - dsc_roundf((bpp - 10) * (3584 / 2)))));
215 rc->first_line_bpg_offset = median3(0, (12 + (int) (0.09 * min(34, slice_height - 8))), (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group)));
216 rc->second_line_bpg_offset = 0;
217 break;
218 }
219
220 initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0;
221 rc->initial_xmit_delay = dsc_roundf(8192.0/2.0/bpp/initial_xmit_delay_factor);
222
223 if (cm == CM_422 || cm == CM_420)
224 slice_width /= 2;
225
226 padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0;
227 if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) {
228 if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1)
229 rc->initial_xmit_delay++;
230 }
231
232 rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
233 rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
234 rc->flatness_det_thresh = 2 << (bpc - 8);
235
236 get_qp_set(rc->qp_min, cm, bpc, MM_MIN, bpp);
237 get_qp_set(rc->qp_max, cm, bpc, MM_MAX, bpp);
238 if (cm == CM_444 && minor_version == 1) {
239 for (i = 0; i < QP_SET_SIZE; ++i) {
240 rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0;
241 rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0;
242 }
243 }
244 get_ofs_set(rc->ofs, cm, bpp);
245
246 /* fixed parameters */
247 rc->rc_model_size = 8192;
248 rc->rc_edge_factor = 6;
249 rc->rc_tgt_offset_hi = 3;
250 rc->rc_tgt_offset_lo = 3;
251
252 rc->rc_buf_thresh[0] = 896;
253 rc->rc_buf_thresh[1] = 1792;
254 rc->rc_buf_thresh[2] = 2688;
255 rc->rc_buf_thresh[3] = 3584;
256 rc->rc_buf_thresh[4] = 4480;
257 rc->rc_buf_thresh[5] = 5376;
258 rc->rc_buf_thresh[6] = 6272;
259 rc->rc_buf_thresh[7] = 6720;
260 rc->rc_buf_thresh[8] = 7168;
261 rc->rc_buf_thresh[9] = 7616;
262 rc->rc_buf_thresh[10] = 7744;
263 rc->rc_buf_thresh[11] = 7872;
264 rc->rc_buf_thresh[12] = 8000;
265 rc->rc_buf_thresh[13] = 8064;
266 }
267
_do_bytes_per_pixel_calc(int slice_width,u16 drm_bpp,bool is_navite_422_or_420)268 static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp,
269 bool is_navite_422_or_420)
270 {
271 float bpp;
272 u32 bytes_per_pixel;
273 double d_bytes_per_pixel;
274
275 bpp = ((float)drm_bpp / 16.0);
276 d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
277 // TODO: Make sure the formula for calculating this is precise (ceiling
278 // vs. floor, and at what point they should be applied)
279 if (is_navite_422_or_420)
280 d_bytes_per_pixel /= 2;
281
282 bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
283
284 return bytes_per_pixel;
285 }
286
_do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps,u32 pix_clk_100hz,u32 bpp_increment_div)287 static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
288 u32 bpp_increment_div)
289 {
290 u32 dsc_target_bpp_x16;
291 float f_dsc_target_bpp;
292 float f_stream_bandwidth_100bps;
293 // bpp_increment_div is actually precision
294 u32 precision = bpp_increment_div;
295
296 f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
297 f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
298
299 // Round down to the nearest precision stop to bring it into DSC spec
300 // range
301 dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
302 dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
303
304 return dsc_target_bpp_x16;
305 }
306
307 /**
308 * calc_rc_params - reads the user's cmdline mode
309 * @rc: DC internal DSC parameters
310 * @pps: DRM struct with all required DSC values
311 *
312 * This function expects a drm_dsc_config data struct with all the required DSC
313 * values previously filled out by our driver and based on this information it
314 * computes some of the DSC values.
315 *
316 * @note This calculation requires float point operation, most of it executes
317 * under kernel_fpu_{begin,end}.
318 */
calc_rc_params(struct rc_params * rc,const struct drm_dsc_config * pps)319 void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
320 {
321 enum colour_mode mode;
322 enum bits_per_comp bpc;
323 bool is_navite_422_or_420;
324 u16 drm_bpp = pps->bits_per_pixel;
325 int slice_width = pps->slice_width;
326 int slice_height = pps->slice_height;
327
328 mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 :
329 (pps->native_422 ? CM_422 :
330 pps->native_420 ? CM_420 : CM_444));
331 bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
332 ? BPC_10 : BPC_12;
333
334 is_navite_422_or_420 = pps->native_422 || pps->native_420;
335
336 DC_FP_START();
337 _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
338 slice_width, slice_height,
339 pps->dsc_version_minor);
340 DC_FP_END();
341 }
342
343 /**
344 * calc_dsc_bytes_per_pixel - calculate bytes per pixel
345 * @pps: DRM struct with all required DSC values
346 *
347 * Based on the information inside drm_dsc_config, this function calculates the
348 * total of bytes per pixel.
349 *
350 * @note This calculation requires float point operation, most of it executes
351 * under kernel_fpu_{begin,end}.
352 *
353 * Return:
354 * Return the number of bytes per pixel
355 */
calc_dsc_bytes_per_pixel(const struct drm_dsc_config * pps)356 u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
357
358 {
359 u32 ret;
360 u16 drm_bpp = pps->bits_per_pixel;
361 int slice_width = pps->slice_width;
362 bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
363
364 DC_FP_START();
365 ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
366 is_navite_422_or_420);
367 DC_FP_END();
368 return ret;
369 }
370
371 /**
372 * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
373 * @stream_bandwidth_kbps:
374 * @pix_clk_100hz:
375 * @bpp_increment_div:
376 *
377 * Calculate the total of bits per pixel for DSC configuration.
378 *
379 * @note This calculation requires float point operation, most of it executes
380 * under kernel_fpu_{begin,end}.
381 */
calc_dsc_bpp_x16(u32 stream_bandwidth_kbps,u32 pix_clk_100hz,u32 bpp_increment_div)382 u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
383 u32 bpp_increment_div)
384 {
385 u32 dsc_bpp;
386
387 DC_FP_START();
388 dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
389 bpp_increment_div);
390 DC_FP_END();
391 return dsc_bpp;
392 }
393