1 /*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/common.h"
22
23 #include "dcadct.h"
24 #include "dcamath.h"
25
sum_a(const int * input,int * output,int len)26 static void sum_a(const int *input, int *output, int len)
27 {
28 int i;
29
30 for (i = 0; i < len; i++)
31 output[i] = input[2 * i] + input[2 * i + 1];
32 }
33
sum_b(const int * input,int * output,int len)34 static void sum_b(const int *input, int *output, int len)
35 {
36 int i;
37
38 output[0] = input[0];
39 for (i = 1; i < len; i++)
40 output[i] = input[2 * i] + input[2 * i - 1];
41 }
42
sum_c(const int * input,int * output,int len)43 static void sum_c(const int *input, int *output, int len)
44 {
45 int i;
46
47 for (i = 0; i < len; i++)
48 output[i] = input[2 * i];
49 }
50
sum_d(const int * input,int * output,int len)51 static void sum_d(const int *input, int *output, int len)
52 {
53 int i;
54
55 output[0] = input[1];
56 for (i = 1; i < len; i++)
57 output[i] = input[2 * i - 1] + input[2 * i + 1];
58 }
59
dct_a(const int * input,int * output)60 static void dct_a(const int *input, int *output)
61 {
62 static const int cos_mod[8][8] = {
63 { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 },
64 { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
65 { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 },
66 { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 },
67 { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 },
68 { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 },
69 { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 },
70 { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 }
71 };
72
73 int i, j;
74
75 for (i = 0; i < 8; i++) {
76 int64_t res = 0;
77 for (j = 0; j < 8; j++)
78 res += (int64_t)cos_mod[i][j] * input[j];
79 output[i] = norm23(res);
80 }
81 }
82
dct_b(const int * input,int * output)83 static void dct_b(const int *input, int *output)
84 {
85 static const int cos_mod[8][7] = {
86 { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 },
87 { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
88 { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 },
89 { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 },
90 { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 },
91 { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 },
92 { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 },
93 { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 }
94 };
95
96 int i, j;
97
98 for (i = 0; i < 8; i++) {
99 int64_t res = input[0] * (INT64_C(1) << 23);
100 for (j = 0; j < 7; j++)
101 res += (int64_t)cos_mod[i][j] * input[1 + j];
102 output[i] = norm23(res);
103 }
104 }
105
mod_a(const int * input,int * output)106 static void mod_a(const int *input, int *output)
107 {
108 static const int cos_mod[16] = {
109 4199362, 4240198, 4323885, 4454708,
110 4639772, 4890013, 5221943, 5660703,
111 -6245623, -7040975, -8158494, -9809974,
112 -12450076, -17261920, -28585092, -85479984
113 };
114
115 int i, k;
116
117 for (i = 0; i < 8; i++)
118 output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
119
120 for (i = 8, k = 7; i < 16; i++, k--)
121 output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
122 }
123
mod_b(int * input,int * output)124 static void mod_b(int *input, int *output)
125 {
126 static const int cos_mod[8] = {
127 4214598, 4383036, 4755871, 5425934,
128 6611520, 8897610, 14448934, 42791536
129 };
130
131 int i, k;
132
133 for (i = 0; i < 8; i++)
134 input[8 + i] = mul23(cos_mod[i], input[8 + i]);
135
136 for (i = 0; i < 8; i++)
137 output[i] = input[i] + input[8 + i];
138
139 for (i = 8, k = 7; i < 16; i++, k--)
140 output[i] = input[k] - input[8 + k];
141 }
142
mod_c(const int * input,int * output)143 static void mod_c(const int *input, int *output)
144 {
145 static const int cos_mod[32] = {
146 1048892, 1051425, 1056522, 1064244,
147 1074689, 1087987, 1104313, 1123884,
148 1146975, 1173922, 1205139, 1241133,
149 1282529, 1330095, 1384791, 1447815,
150 -1520688, -1605358, -1704360, -1821051,
151 -1959964, -2127368, -2332183, -2587535,
152 -2913561, -3342802, -3931480, -4785806,
153 -6133390, -8566050, -14253820, -42727120
154 };
155
156 int i, k;
157
158 for (i = 0; i < 16; i++)
159 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
160
161 for (i = 16, k = 15; i < 32; i++, k--)
162 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
163 }
164
clp_v(int * input,int len)165 static void clp_v(int *input, int len)
166 {
167 int i;
168
169 for (i = 0; i < len; i++)
170 input[i] = clip23(input[i]);
171 }
172
imdct_half_32(int32_t * output,const int32_t * input)173 static void imdct_half_32(int32_t *output, const int32_t *input)
174 {
175 int buf_a[32], buf_b[32];
176 int i, k, mag, shift, round;
177
178 mag = 0;
179 for (i = 0; i < 32; i++)
180 mag += abs(input[i]);
181
182 shift = mag > 0x400000 ? 2 : 0;
183 round = shift > 0 ? 1 << (shift - 1) : 0;
184
185 for (i = 0; i < 32; i++)
186 buf_a[i] = (input[i] + round) >> shift;
187
188 sum_a(buf_a, buf_b + 0, 16);
189 sum_b(buf_a, buf_b + 16, 16);
190 clp_v(buf_b, 32);
191
192 sum_a(buf_b + 0, buf_a + 0, 8);
193 sum_b(buf_b + 0, buf_a + 8, 8);
194 sum_c(buf_b + 16, buf_a + 16, 8);
195 sum_d(buf_b + 16, buf_a + 24, 8);
196 clp_v(buf_a, 32);
197
198 dct_a(buf_a + 0, buf_b + 0);
199 dct_b(buf_a + 8, buf_b + 8);
200 dct_b(buf_a + 16, buf_b + 16);
201 dct_b(buf_a + 24, buf_b + 24);
202 clp_v(buf_b, 32);
203
204 mod_a(buf_b + 0, buf_a + 0);
205 mod_b(buf_b + 16, buf_a + 16);
206 clp_v(buf_a, 32);
207
208 mod_c(buf_a, buf_b);
209
210 for (i = 0; i < 32; i++)
211 buf_b[i] = clip23(buf_b[i] * (1 << shift));
212
213 for (i = 0, k = 31; i < 16; i++, k--) {
214 output[ i] = clip23(buf_b[i] - buf_b[k]);
215 output[16 + i] = clip23(buf_b[i] + buf_b[k]);
216 }
217 }
218
mod64_a(const int * input,int * output)219 static void mod64_a(const int *input, int *output)
220 {
221 static const int cos_mod[32] = {
222 4195568, 4205700, 4226086, 4256977,
223 4298755, 4351949, 4417251, 4495537,
224 4587901, 4695690, 4820557, 4964534,
225 5130115, 5320382, 5539164, 5791261,
226 -6082752, -6421430, -6817439, -7284203,
227 -7839855, -8509474, -9328732, -10350140,
228 -11654242, -13371208, -15725922, -19143224,
229 -24533560, -34264200, -57015280, -170908480
230 };
231
232 int i, k;
233
234 for (i = 0; i < 16; i++)
235 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
236
237 for (i = 16, k = 15; i < 32; i++, k--)
238 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
239 }
240
mod64_b(int * input,int * output)241 static void mod64_b(int *input, int *output)
242 {
243 static const int cos_mod[16] = {
244 4199362, 4240198, 4323885, 4454708,
245 4639772, 4890013, 5221943, 5660703,
246 6245623, 7040975, 8158494, 9809974,
247 12450076, 17261920, 28585092, 85479984
248 };
249
250 int i, k;
251
252 for (i = 0; i < 16; i++)
253 input[16 + i] = mul23(cos_mod[i], input[16 + i]);
254
255 for (i = 0; i < 16; i++)
256 output[i] = input[i] + input[16 + i];
257
258 for (i = 16, k = 15; i < 32; i++, k--)
259 output[i] = input[k] - input[16 + k];
260 }
261
mod64_c(const int * input,int * output)262 static void mod64_c(const int *input, int *output)
263 {
264 static const int cos_mod[64] = {
265 741511, 741958, 742853, 744199,
266 746001, 748262, 750992, 754197,
267 757888, 762077, 766777, 772003,
268 777772, 784105, 791021, 798546,
269 806707, 815532, 825054, 835311,
270 846342, 858193, 870912, 884554,
271 899181, 914860, 931667, 949686,
272 969011, 989747, 1012012, 1035941,
273 -1061684, -1089412, -1119320, -1151629,
274 -1186595, -1224511, -1265719, -1310613,
275 -1359657, -1413400, -1472490, -1537703,
276 -1609974, -1690442, -1780506, -1881904,
277 -1996824, -2128058, -2279225, -2455101,
278 -2662128, -2909200, -3208956, -3579983,
279 -4050785, -4667404, -5509372, -6726913,
280 -8641940, -12091426, -20144284, -60420720
281 };
282
283 int i, k;
284
285 for (i = 0; i < 32; i++)
286 output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
287
288 for (i = 32, k = 31; i < 64; i++, k--)
289 output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
290 }
291
imdct_half_64(int32_t * output,const int32_t * input)292 static void imdct_half_64(int32_t *output, const int32_t *input)
293 {
294 int buf_a[64], buf_b[64];
295 int i, k, mag, shift, round;
296
297 mag = 0;
298 for (i = 0; i < 64; i++)
299 mag += abs(input[i]);
300
301 shift = mag > 0x400000 ? 2 : 0;
302 round = shift > 0 ? 1 << (shift - 1) : 0;
303
304 for (i = 0; i < 64; i++)
305 buf_a[i] = (input[i] + round) >> shift;
306
307 sum_a(buf_a, buf_b + 0, 32);
308 sum_b(buf_a, buf_b + 32, 32);
309 clp_v(buf_b, 64);
310
311 sum_a(buf_b + 0, buf_a + 0, 16);
312 sum_b(buf_b + 0, buf_a + 16, 16);
313 sum_c(buf_b + 32, buf_a + 32, 16);
314 sum_d(buf_b + 32, buf_a + 48, 16);
315 clp_v(buf_a, 64);
316
317 sum_a(buf_a + 0, buf_b + 0, 8);
318 sum_b(buf_a + 0, buf_b + 8, 8);
319 sum_c(buf_a + 16, buf_b + 16, 8);
320 sum_d(buf_a + 16, buf_b + 24, 8);
321 sum_c(buf_a + 32, buf_b + 32, 8);
322 sum_d(buf_a + 32, buf_b + 40, 8);
323 sum_c(buf_a + 48, buf_b + 48, 8);
324 sum_d(buf_a + 48, buf_b + 56, 8);
325 clp_v(buf_b, 64);
326
327 dct_a(buf_b + 0, buf_a + 0);
328 dct_b(buf_b + 8, buf_a + 8);
329 dct_b(buf_b + 16, buf_a + 16);
330 dct_b(buf_b + 24, buf_a + 24);
331 dct_b(buf_b + 32, buf_a + 32);
332 dct_b(buf_b + 40, buf_a + 40);
333 dct_b(buf_b + 48, buf_a + 48);
334 dct_b(buf_b + 56, buf_a + 56);
335 clp_v(buf_a, 64);
336
337 mod_a(buf_a + 0, buf_b + 0);
338 mod_b(buf_a + 16, buf_b + 16);
339 mod_b(buf_a + 32, buf_b + 32);
340 mod_b(buf_a + 48, buf_b + 48);
341 clp_v(buf_b, 64);
342
343 mod64_a(buf_b + 0, buf_a + 0);
344 mod64_b(buf_b + 32, buf_a + 32);
345 clp_v(buf_a, 64);
346
347 mod64_c(buf_a, buf_b);
348
349 for (i = 0; i < 64; i++)
350 buf_b[i] = clip23(buf_b[i] * (1 << shift));
351
352 for (i = 0, k = 63; i < 32; i++, k--) {
353 output[ i] = clip23(buf_b[i] - buf_b[k]);
354 output[32 + i] = clip23(buf_b[i] + buf_b[k]);
355 }
356 }
357
ff_dcadct_init(DCADCTContext * c)358 av_cold void ff_dcadct_init(DCADCTContext *c)
359 {
360 c->imdct_half[0] = imdct_half_32;
361 c->imdct_half[1] = imdct_half_64;
362 }
363