1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include "aom_dsp/txfm_common.h"
14 #include "config/aom_dsp_rtcd.h"
15
aom_fdct4x4_c(const int16_t * input,tran_low_t * output,int stride)16 void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
17 // The 2D transform is done with two passes which are actually pretty
18 // similar. In the first one, we transform the columns and transpose
19 // the results. In the second one, we transform the rows. To achieve that,
20 // as the first pass results are transposed, we transpose the columns (that
21 // is the transposed rows) and transpose the results (so that it goes back
22 // in normal/row positions).
23 // We need an intermediate buffer between passes.
24 tran_low_t intermediate[4 * 4];
25 const tran_low_t *in_low = NULL;
26 tran_low_t *out = intermediate;
27 // Do the two transform/transpose passes
28 for (int pass = 0; pass < 2; ++pass) {
29 tran_high_t in_high[4]; // canbe16
30 tran_high_t step[4]; // canbe16
31 tran_high_t temp1, temp2; // needs32
32 for (int i = 0; i < 4; ++i) {
33 // Load inputs.
34 if (pass == 0) {
35 in_high[0] = input[0 * stride] * 16;
36 in_high[1] = input[1 * stride] * 16;
37 in_high[2] = input[2 * stride] * 16;
38 in_high[3] = input[3 * stride] * 16;
39 if (i == 0 && in_high[0]) {
40 ++in_high[0];
41 }
42 } else {
43 assert(in_low != NULL);
44 in_high[0] = in_low[0 * 4];
45 in_high[1] = in_low[1 * 4];
46 in_high[2] = in_low[2 * 4];
47 in_high[3] = in_low[3 * 4];
48 ++in_low;
49 }
50 // Transform.
51 step[0] = in_high[0] + in_high[3];
52 step[1] = in_high[1] + in_high[2];
53 step[2] = in_high[1] - in_high[2];
54 step[3] = in_high[0] - in_high[3];
55 temp1 = (step[0] + step[1]) * cospi_16_64;
56 temp2 = (step[0] - step[1]) * cospi_16_64;
57 out[0] = (tran_low_t)fdct_round_shift(temp1);
58 out[2] = (tran_low_t)fdct_round_shift(temp2);
59 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
60 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
61 out[1] = (tran_low_t)fdct_round_shift(temp1);
62 out[3] = (tran_low_t)fdct_round_shift(temp2);
63 // Do next column (which is a transposed row in second/horizontal pass)
64 ++input;
65 out += 4;
66 }
67 // Setup in/out for next pass.
68 in_low = intermediate;
69 out = output;
70 }
71
72 for (int i = 0; i < 4; ++i) {
73 for (int j = 0; j < 4; ++j)
74 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
75 }
76 }
77
aom_fdct4x4_lp_c(const int16_t * input,int16_t * output,int stride)78 void aom_fdct4x4_lp_c(const int16_t *input, int16_t *output, int stride) {
79 // The 2D transform is done with two passes which are actually pretty
80 // similar. In the first one, we transform the columns and transpose
81 // the results. In the second one, we transform the rows. To achieve that,
82 // as the first pass results are transposed, we transpose the columns (that
83 // is the transposed rows) and transpose the results (so that it goes back
84 // in normal/row positions).
85 // We need an intermediate buffer between passes.
86 int16_t intermediate[4 * 4];
87 const int16_t *in_low = NULL;
88 int16_t *out = intermediate;
89 // Do the two transform/transpose passes
90 for (int pass = 0; pass < 2; ++pass) {
91 int32_t in_high[4]; // canbe16
92 int32_t step[4]; // canbe16
93 int32_t temp1, temp2; // needs32
94 for (int i = 0; i < 4; ++i) {
95 // Load inputs.
96 if (pass == 0) {
97 in_high[0] = input[0 * stride] * 16;
98 in_high[1] = input[1 * stride] * 16;
99 in_high[2] = input[2 * stride] * 16;
100 in_high[3] = input[3 * stride] * 16;
101 if (i == 0 && in_high[0]) {
102 ++in_high[0];
103 }
104 } else {
105 assert(in_low != NULL);
106 in_high[0] = in_low[0 * 4];
107 in_high[1] = in_low[1 * 4];
108 in_high[2] = in_low[2 * 4];
109 in_high[3] = in_low[3 * 4];
110 ++in_low;
111 }
112 // Transform.
113 step[0] = in_high[0] + in_high[3];
114 step[1] = in_high[1] + in_high[2];
115 step[2] = in_high[1] - in_high[2];
116 step[3] = in_high[0] - in_high[3];
117 temp1 = (step[0] + step[1]) * (int32_t)cospi_16_64;
118 temp2 = (step[0] - step[1]) * (int32_t)cospi_16_64;
119 out[0] = (int16_t)fdct_round_shift(temp1);
120 out[2] = (int16_t)fdct_round_shift(temp2);
121 temp1 = step[2] * (int32_t)cospi_24_64 + step[3] * (int32_t)cospi_8_64;
122 temp2 = -step[2] * (int32_t)cospi_8_64 + step[3] * (int32_t)cospi_24_64;
123 out[1] = (int16_t)fdct_round_shift(temp1);
124 out[3] = (int16_t)fdct_round_shift(temp2);
125 // Do next column (which is a transposed row in second/horizontal pass)
126 ++input;
127 out += 4;
128 }
129 // Setup in/out for next pass.
130 in_low = intermediate;
131 out = output;
132 }
133
134 for (int i = 0; i < 4; ++i) {
135 for (int j = 0; j < 4; ++j)
136 output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
137 }
138 }
139
aom_fdct8x8_c(const int16_t * input,tran_low_t * final_output,int stride)140 void aom_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
141 int i, j;
142 tran_low_t intermediate[64];
143 int pass;
144 tran_low_t *output = intermediate;
145 const tran_low_t *in = NULL;
146
147 // Transform columns
148 for (pass = 0; pass < 2; ++pass) {
149 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
150 tran_high_t t0, t1, t2, t3; // needs32
151 tran_high_t x0, x1, x2, x3; // canbe16
152
153 for (i = 0; i < 8; i++) {
154 // stage 1
155 if (pass == 0) {
156 s0 = (input[0 * stride] + input[7 * stride]) * 4;
157 s1 = (input[1 * stride] + input[6 * stride]) * 4;
158 s2 = (input[2 * stride] + input[5 * stride]) * 4;
159 s3 = (input[3 * stride] + input[4 * stride]) * 4;
160 s4 = (input[3 * stride] - input[4 * stride]) * 4;
161 s5 = (input[2 * stride] - input[5 * stride]) * 4;
162 s6 = (input[1 * stride] - input[6 * stride]) * 4;
163 s7 = (input[0 * stride] - input[7 * stride]) * 4;
164 ++input;
165 } else {
166 s0 = in[0 * 8] + in[7 * 8];
167 s1 = in[1 * 8] + in[6 * 8];
168 s2 = in[2 * 8] + in[5 * 8];
169 s3 = in[3 * 8] + in[4 * 8];
170 s4 = in[3 * 8] - in[4 * 8];
171 s5 = in[2 * 8] - in[5 * 8];
172 s6 = in[1 * 8] - in[6 * 8];
173 s7 = in[0 * 8] - in[7 * 8];
174 ++in;
175 }
176
177 // fdct4(step, step);
178 x0 = s0 + s3;
179 x1 = s1 + s2;
180 x2 = s1 - s2;
181 x3 = s0 - s3;
182 t0 = (x0 + x1) * cospi_16_64;
183 t1 = (x0 - x1) * cospi_16_64;
184 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
185 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
186 output[0] = (tran_low_t)fdct_round_shift(t0);
187 output[2] = (tran_low_t)fdct_round_shift(t2);
188 output[4] = (tran_low_t)fdct_round_shift(t1);
189 output[6] = (tran_low_t)fdct_round_shift(t3);
190
191 // Stage 2
192 t0 = (s6 - s5) * cospi_16_64;
193 t1 = (s6 + s5) * cospi_16_64;
194 t2 = fdct_round_shift(t0);
195 t3 = fdct_round_shift(t1);
196
197 // Stage 3
198 x0 = s4 + t2;
199 x1 = s4 - t2;
200 x2 = s7 - t3;
201 x3 = s7 + t3;
202
203 // Stage 4
204 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
205 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
206 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
207 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
208 output[1] = (tran_low_t)fdct_round_shift(t0);
209 output[3] = (tran_low_t)fdct_round_shift(t2);
210 output[5] = (tran_low_t)fdct_round_shift(t1);
211 output[7] = (tran_low_t)fdct_round_shift(t3);
212 output += 8;
213 }
214 in = intermediate;
215 output = final_output;
216 }
217
218 // Rows
219 for (i = 0; i < 8; ++i) {
220 for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
221 }
222 }
223
224 #if CONFIG_AV1_HIGHBITDEPTH
aom_highbd_fdct8x8_c(const int16_t * input,tran_low_t * final_output,int stride)225 void aom_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
226 int stride) {
227 aom_fdct8x8_c(input, final_output, stride);
228 }
229 #endif
230