1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12
13 #include "./vp9_rtcd.h"
14 #include "./vpx_dsp_rtcd.h"
15 #include "vp9/common/vp9_blockd.h"
16 #include "vp9/common/vp9_idct.h"
17 #include "vpx_dsp/inv_txfm.h"
18 #include "vpx_ports/mem.h"
19
vp9_iht4x4_16_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
21 int tx_type) {
22 const transform_2d IHT_4[] = {
23 { idct4_c, idct4_c }, // DCT_DCT = 0
24 { iadst4_c, idct4_c }, // ADST_DCT = 1
25 { idct4_c, iadst4_c }, // DCT_ADST = 2
26 { iadst4_c, iadst4_c } // ADST_ADST = 3
27 };
28
29 int i, j;
30 tran_low_t out[4 * 4];
31 tran_low_t *outptr = out;
32 tran_low_t temp_in[4], temp_out[4];
33
34 // inverse transform row vectors
35 for (i = 0; i < 4; ++i) {
36 IHT_4[tx_type].rows(input, outptr);
37 input += 4;
38 outptr += 4;
39 }
40
41 // inverse transform column vectors
42 for (i = 0; i < 4; ++i) {
43 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
44 IHT_4[tx_type].cols(temp_in, temp_out);
45 for (j = 0; j < 4; ++j) {
46 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
47 ROUND_POWER_OF_TWO(temp_out[j], 4));
48 }
49 }
50 }
51
52 static const transform_2d IHT_8[] = {
53 { idct8_c, idct8_c }, // DCT_DCT = 0
54 { iadst8_c, idct8_c }, // ADST_DCT = 1
55 { idct8_c, iadst8_c }, // DCT_ADST = 2
56 { iadst8_c, iadst8_c } // ADST_ADST = 3
57 };
58
vp9_iht8x8_64_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
60 int tx_type) {
61 int i, j;
62 tran_low_t out[8 * 8];
63 tran_low_t *outptr = out;
64 tran_low_t temp_in[8], temp_out[8];
65 const transform_2d ht = IHT_8[tx_type];
66
67 // inverse transform row vectors
68 for (i = 0; i < 8; ++i) {
69 ht.rows(input, outptr);
70 input += 8;
71 outptr += 8;
72 }
73
74 // inverse transform column vectors
75 for (i = 0; i < 8; ++i) {
76 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
77 ht.cols(temp_in, temp_out);
78 for (j = 0; j < 8; ++j) {
79 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
80 ROUND_POWER_OF_TWO(temp_out[j], 5));
81 }
82 }
83 }
84
85 static const transform_2d IHT_16[] = {
86 { idct16_c, idct16_c }, // DCT_DCT = 0
87 { iadst16_c, idct16_c }, // ADST_DCT = 1
88 { idct16_c, iadst16_c }, // DCT_ADST = 2
89 { iadst16_c, iadst16_c } // ADST_ADST = 3
90 };
91
vp9_iht16x16_256_add_c(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
93 int tx_type) {
94 int i, j;
95 tran_low_t out[16 * 16];
96 tran_low_t *outptr = out;
97 tran_low_t temp_in[16], temp_out[16];
98 const transform_2d ht = IHT_16[tx_type];
99
100 // Rows
101 for (i = 0; i < 16; ++i) {
102 ht.rows(input, outptr);
103 input += 16;
104 outptr += 16;
105 }
106
107 // Columns
108 for (i = 0; i < 16; ++i) {
109 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
110 ht.cols(temp_in, temp_out);
111 for (j = 0; j < 16; ++j) {
112 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
113 ROUND_POWER_OF_TWO(temp_out[j], 6));
114 }
115 }
116 }
117
118 // idct
vp9_idct4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
120 int eob) {
121 if (eob > 1)
122 vpx_idct4x4_16_add(input, dest, stride);
123 else
124 vpx_idct4x4_1_add(input, dest, stride);
125 }
126
vp9_iwht4x4_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
128 int eob) {
129 if (eob > 1)
130 vpx_iwht4x4_16_add(input, dest, stride);
131 else
132 vpx_iwht4x4_1_add(input, dest, stride);
133 }
134
vp9_idct8x8_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
136 int eob) {
137 // If dc is 1, then input[0] is the reconstructed value, do not need
138 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
139
140 // The calculation can be simplified if there are not many non-zero dct
141 // coefficients. Use eobs to decide what to do.
142 if (eob == 1)
143 // DC only DCT coefficient
144 vpx_idct8x8_1_add(input, dest, stride);
145 else if (eob <= 12)
146 vpx_idct8x8_12_add(input, dest, stride);
147 else
148 vpx_idct8x8_64_add(input, dest, stride);
149 }
150
vp9_idct16x16_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
152 int eob) {
153 /* The calculation can be simplified if there are not many non-zero dct
154 * coefficients. Use eobs to separate different cases. */
155 if (eob == 1) /* DC only DCT coefficient. */
156 vpx_idct16x16_1_add(input, dest, stride);
157 else if (eob <= 10)
158 vpx_idct16x16_10_add(input, dest, stride);
159 else if (eob <= 38)
160 vpx_idct16x16_38_add(input, dest, stride);
161 else
162 vpx_idct16x16_256_add(input, dest, stride);
163 }
164
vp9_idct32x32_add(const tran_low_t * input,uint8_t * dest,int stride,int eob)165 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
166 int eob) {
167 if (eob == 1)
168 vpx_idct32x32_1_add(input, dest, stride);
169 else if (eob <= 34)
170 // non-zero coeff only in upper-left 8x8
171 vpx_idct32x32_34_add(input, dest, stride);
172 else if (eob <= 135)
173 // non-zero coeff only in upper-left 16x16
174 vpx_idct32x32_135_add(input, dest, stride);
175 else
176 vpx_idct32x32_1024_add(input, dest, stride);
177 }
178
179 // iht
vp9_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)180 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
181 int stride, int eob) {
182 if (tx_type == DCT_DCT)
183 vp9_idct4x4_add(input, dest, stride, eob);
184 else
185 vp9_iht4x4_16_add(input, dest, stride, tx_type);
186 }
187
vp9_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)188 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
189 int stride, int eob) {
190 if (tx_type == DCT_DCT) {
191 vp9_idct8x8_add(input, dest, stride, eob);
192 } else {
193 vp9_iht8x8_64_add(input, dest, stride, tx_type);
194 }
195 }
196
vp9_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint8_t * dest,int stride,int eob)197 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
198 int stride, int eob) {
199 if (tx_type == DCT_DCT) {
200 vp9_idct16x16_add(input, dest, stride, eob);
201 } else {
202 vp9_iht16x16_256_add(input, dest, stride, tx_type);
203 }
204 }
205
206 #if CONFIG_VP9_HIGHBITDEPTH
207
vp9_highbd_iht4x4_16_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)208 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
209 int stride, int tx_type, int bd) {
210 const highbd_transform_2d IHT_4[] = {
211 { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
212 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
213 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
214 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
215 };
216
217 int i, j;
218 tran_low_t out[4 * 4];
219 tran_low_t *outptr = out;
220 tran_low_t temp_in[4], temp_out[4];
221
222 // Inverse transform row vectors.
223 for (i = 0; i < 4; ++i) {
224 IHT_4[tx_type].rows(input, outptr, bd);
225 input += 4;
226 outptr += 4;
227 }
228
229 // Inverse transform column vectors.
230 for (i = 0; i < 4; ++i) {
231 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
232 IHT_4[tx_type].cols(temp_in, temp_out, bd);
233 for (j = 0; j < 4; ++j) {
234 dest[j * stride + i] = highbd_clip_pixel_add(
235 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
236 }
237 }
238 }
239
240 static const highbd_transform_2d HIGH_IHT_8[] = {
241 { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
242 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
243 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
244 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
245 };
246
vp9_highbd_iht8x8_64_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)247 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
248 int stride, int tx_type, int bd) {
249 int i, j;
250 tran_low_t out[8 * 8];
251 tran_low_t *outptr = out;
252 tran_low_t temp_in[8], temp_out[8];
253 const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
254
255 // Inverse transform row vectors.
256 for (i = 0; i < 8; ++i) {
257 ht.rows(input, outptr, bd);
258 input += 8;
259 outptr += 8;
260 }
261
262 // Inverse transform column vectors.
263 for (i = 0; i < 8; ++i) {
264 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
265 ht.cols(temp_in, temp_out, bd);
266 for (j = 0; j < 8; ++j) {
267 dest[j * stride + i] = highbd_clip_pixel_add(
268 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
269 }
270 }
271 }
272
273 static const highbd_transform_2d HIGH_IHT_16[] = {
274 { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
275 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
276 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
277 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
278 };
279
vp9_highbd_iht16x16_256_add_c(const tran_low_t * input,uint16_t * dest,int stride,int tx_type,int bd)280 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
281 int stride, int tx_type, int bd) {
282 int i, j;
283 tran_low_t out[16 * 16];
284 tran_low_t *outptr = out;
285 tran_low_t temp_in[16], temp_out[16];
286 const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
287
288 // Rows
289 for (i = 0; i < 16; ++i) {
290 ht.rows(input, outptr, bd);
291 input += 16;
292 outptr += 16;
293 }
294
295 // Columns
296 for (i = 0; i < 16; ++i) {
297 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
298 ht.cols(temp_in, temp_out, bd);
299 for (j = 0; j < 16; ++j) {
300 dest[j * stride + i] = highbd_clip_pixel_add(
301 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
302 }
303 }
304 }
305
306 // idct
vp9_highbd_idct4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)307 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
308 int eob, int bd) {
309 if (eob > 1)
310 vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
311 else
312 vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
313 }
314
vp9_highbd_iwht4x4_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)315 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
316 int eob, int bd) {
317 if (eob > 1)
318 vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
319 else
320 vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
321 }
322
vp9_highbd_idct8x8_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)323 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
324 int eob, int bd) {
325 // If dc is 1, then input[0] is the reconstructed value, do not need
326 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
327
328 // The calculation can be simplified if there are not many non-zero dct
329 // coefficients. Use eobs to decide what to do.
330 // DC only DCT coefficient
331 if (eob == 1) {
332 vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
333 } else if (eob <= 12) {
334 vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
335 } else {
336 vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
337 }
338 }
339
vp9_highbd_idct16x16_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)340 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
341 int stride, int eob, int bd) {
342 // The calculation can be simplified if there are not many non-zero dct
343 // coefficients. Use eobs to separate different cases.
344 // DC only DCT coefficient.
345 if (eob == 1) {
346 vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
347 } else if (eob <= 10) {
348 vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
349 } else if (eob <= 38) {
350 vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
351 } else {
352 vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
353 }
354 }
355
vp9_highbd_idct32x32_add(const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)356 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
357 int stride, int eob, int bd) {
358 // Non-zero coeff only in upper-left 8x8
359 if (eob == 1) {
360 vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
361 } else if (eob <= 34) {
362 vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
363 } else if (eob <= 135) {
364 vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
365 } else {
366 vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
367 }
368 }
369
370 // iht
vp9_highbd_iht4x4_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)371 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
372 uint16_t *dest, int stride, int eob, int bd) {
373 if (tx_type == DCT_DCT)
374 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
375 else
376 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
377 }
378
vp9_highbd_iht8x8_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)379 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
380 uint16_t *dest, int stride, int eob, int bd) {
381 if (tx_type == DCT_DCT) {
382 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
383 } else {
384 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
385 }
386 }
387
vp9_highbd_iht16x16_add(TX_TYPE tx_type,const tran_low_t * input,uint16_t * dest,int stride,int eob,int bd)388 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
389 uint16_t *dest, int stride, int eob, int bd) {
390 if (tx_type == DCT_DCT) {
391 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
392 } else {
393 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
394 }
395 }
396 #endif // CONFIG_VP9_HIGHBITDEPTH
397