1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdlib.h>
13 #include "av1/common/av1_inv_txfm1d.h"
14 #include "av1/common/av1_txfm.h"
15
av1_idct4(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)16 void av1_idct4(const int32_t *input, int32_t *output, int8_t cos_bit,
17 const int8_t *stage_range) {
18 assert(output != input);
19 const int32_t size = 4;
20 const int32_t *cospi = cospi_arr(cos_bit);
21
22 int32_t stage = 0;
23 int32_t *bf0, *bf1;
24 int32_t step[4];
25
26 // stage 0;
27
28 // stage 1;
29 stage++;
30 bf1 = output;
31 bf1[0] = input[0];
32 bf1[1] = input[2];
33 bf1[2] = input[1];
34 bf1[3] = input[3];
35 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
36
37 // stage 2
38 stage++;
39 bf0 = output;
40 bf1 = step;
41 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
42 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
43 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
44 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
45 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
46
47 // stage 3
48 stage++;
49 bf0 = step;
50 bf1 = output;
51 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
52 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
53 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
54 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
55 }
56
av1_idct8(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)57 void av1_idct8(const int32_t *input, int32_t *output, int8_t cos_bit,
58 const int8_t *stage_range) {
59 assert(output != input);
60 const int32_t size = 8;
61 const int32_t *cospi = cospi_arr(cos_bit);
62
63 int32_t stage = 0;
64 int32_t *bf0, *bf1;
65 int32_t step[8];
66
67 // stage 0;
68
69 // stage 1;
70 stage++;
71 bf1 = output;
72 bf1[0] = input[0];
73 bf1[1] = input[4];
74 bf1[2] = input[2];
75 bf1[3] = input[6];
76 bf1[4] = input[1];
77 bf1[5] = input[5];
78 bf1[6] = input[3];
79 bf1[7] = input[7];
80 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
81
82 // stage 2
83 stage++;
84 bf0 = output;
85 bf1 = step;
86 bf1[0] = bf0[0];
87 bf1[1] = bf0[1];
88 bf1[2] = bf0[2];
89 bf1[3] = bf0[3];
90 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
91 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
92 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
93 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
94 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
95
96 // stage 3
97 stage++;
98 bf0 = step;
99 bf1 = output;
100 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
101 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
102 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
103 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
104 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
105 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
106 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
107 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
108 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
109
110 // stage 4
111 stage++;
112 bf0 = output;
113 bf1 = step;
114 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
115 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
116 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
117 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
118 bf1[4] = bf0[4];
119 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
120 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
121 bf1[7] = bf0[7];
122 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
123
124 // stage 5
125 stage++;
126 bf0 = step;
127 bf1 = output;
128 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
129 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
130 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
131 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
132 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
133 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
134 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
135 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
136 }
137
av1_idct16(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)138 void av1_idct16(const int32_t *input, int32_t *output, int8_t cos_bit,
139 const int8_t *stage_range) {
140 assert(output != input);
141 const int32_t size = 16;
142 const int32_t *cospi = cospi_arr(cos_bit);
143
144 int32_t stage = 0;
145 int32_t *bf0, *bf1;
146 int32_t step[16];
147
148 // stage 0;
149
150 // stage 1;
151 stage++;
152 bf1 = output;
153 bf1[0] = input[0];
154 bf1[1] = input[8];
155 bf1[2] = input[4];
156 bf1[3] = input[12];
157 bf1[4] = input[2];
158 bf1[5] = input[10];
159 bf1[6] = input[6];
160 bf1[7] = input[14];
161 bf1[8] = input[1];
162 bf1[9] = input[9];
163 bf1[10] = input[5];
164 bf1[11] = input[13];
165 bf1[12] = input[3];
166 bf1[13] = input[11];
167 bf1[14] = input[7];
168 bf1[15] = input[15];
169 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
170
171 // stage 2
172 stage++;
173 bf0 = output;
174 bf1 = step;
175 bf1[0] = bf0[0];
176 bf1[1] = bf0[1];
177 bf1[2] = bf0[2];
178 bf1[3] = bf0[3];
179 bf1[4] = bf0[4];
180 bf1[5] = bf0[5];
181 bf1[6] = bf0[6];
182 bf1[7] = bf0[7];
183 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
184 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
185 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
186 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
187 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
188 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
189 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
190 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
191 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
192
193 // stage 3
194 stage++;
195 bf0 = step;
196 bf1 = output;
197 bf1[0] = bf0[0];
198 bf1[1] = bf0[1];
199 bf1[2] = bf0[2];
200 bf1[3] = bf0[3];
201 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
202 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
203 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
204 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
205 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
206 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
207 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
208 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
209 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
210 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
211 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
212 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
213 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
214
215 // stage 4
216 stage++;
217 bf0 = output;
218 bf1 = step;
219 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
220 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
221 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
222 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
223 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
224 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
225 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
226 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
227 bf1[8] = bf0[8];
228 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
229 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
230 bf1[11] = bf0[11];
231 bf1[12] = bf0[12];
232 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
233 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
234 bf1[15] = bf0[15];
235 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
236
237 // stage 5
238 stage++;
239 bf0 = step;
240 bf1 = output;
241 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
242 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
243 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
244 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
245 bf1[4] = bf0[4];
246 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
247 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
248 bf1[7] = bf0[7];
249 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
250 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
251 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
252 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
253 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
254 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
255 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
256 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
257 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
258
259 // stage 6
260 stage++;
261 bf0 = output;
262 bf1 = step;
263 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
264 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
265 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
266 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
267 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
268 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
269 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
270 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
271 bf1[8] = bf0[8];
272 bf1[9] = bf0[9];
273 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
274 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
275 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
276 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
277 bf1[14] = bf0[14];
278 bf1[15] = bf0[15];
279 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
280
281 // stage 7
282 stage++;
283 bf0 = step;
284 bf1 = output;
285 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
286 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
287 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
288 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
289 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
290 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
291 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
292 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
293 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
294 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
295 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
296 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
297 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
298 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
299 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
300 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
301 }
302
av1_idct32(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)303 void av1_idct32(const int32_t *input, int32_t *output, int8_t cos_bit,
304 const int8_t *stage_range) {
305 assert(output != input);
306 const int32_t size = 32;
307 const int32_t *cospi = cospi_arr(cos_bit);
308
309 int32_t stage = 0;
310 int32_t *bf0, *bf1;
311 int32_t step[32];
312
313 // stage 0;
314
315 // stage 1;
316 stage++;
317 bf1 = output;
318 bf1[0] = input[0];
319 bf1[1] = input[16];
320 bf1[2] = input[8];
321 bf1[3] = input[24];
322 bf1[4] = input[4];
323 bf1[5] = input[20];
324 bf1[6] = input[12];
325 bf1[7] = input[28];
326 bf1[8] = input[2];
327 bf1[9] = input[18];
328 bf1[10] = input[10];
329 bf1[11] = input[26];
330 bf1[12] = input[6];
331 bf1[13] = input[22];
332 bf1[14] = input[14];
333 bf1[15] = input[30];
334 bf1[16] = input[1];
335 bf1[17] = input[17];
336 bf1[18] = input[9];
337 bf1[19] = input[25];
338 bf1[20] = input[5];
339 bf1[21] = input[21];
340 bf1[22] = input[13];
341 bf1[23] = input[29];
342 bf1[24] = input[3];
343 bf1[25] = input[19];
344 bf1[26] = input[11];
345 bf1[27] = input[27];
346 bf1[28] = input[7];
347 bf1[29] = input[23];
348 bf1[30] = input[15];
349 bf1[31] = input[31];
350 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
351
352 // stage 2
353 stage++;
354 bf0 = output;
355 bf1 = step;
356 bf1[0] = bf0[0];
357 bf1[1] = bf0[1];
358 bf1[2] = bf0[2];
359 bf1[3] = bf0[3];
360 bf1[4] = bf0[4];
361 bf1[5] = bf0[5];
362 bf1[6] = bf0[6];
363 bf1[7] = bf0[7];
364 bf1[8] = bf0[8];
365 bf1[9] = bf0[9];
366 bf1[10] = bf0[10];
367 bf1[11] = bf0[11];
368 bf1[12] = bf0[12];
369 bf1[13] = bf0[13];
370 bf1[14] = bf0[14];
371 bf1[15] = bf0[15];
372 bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
373 bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
374 bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
375 bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
376 bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
377 bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
378 bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
379 bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
380 bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
381 bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
382 bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
383 bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
384 bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
385 bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
386 bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
387 bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
388 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
389
390 // stage 3
391 stage++;
392 bf0 = step;
393 bf1 = output;
394 bf1[0] = bf0[0];
395 bf1[1] = bf0[1];
396 bf1[2] = bf0[2];
397 bf1[3] = bf0[3];
398 bf1[4] = bf0[4];
399 bf1[5] = bf0[5];
400 bf1[6] = bf0[6];
401 bf1[7] = bf0[7];
402 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
403 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
404 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
405 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
406 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
407 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
408 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
409 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
410 bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
411 bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
412 bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
413 bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
414 bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
415 bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
416 bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
417 bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
418 bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
419 bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
420 bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
421 bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
422 bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
423 bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
424 bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
425 bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
426 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
427
428 // stage 4
429 stage++;
430 bf0 = output;
431 bf1 = step;
432 bf1[0] = bf0[0];
433 bf1[1] = bf0[1];
434 bf1[2] = bf0[2];
435 bf1[3] = bf0[3];
436 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
437 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
438 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
439 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
440 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
441 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
442 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
443 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
444 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
445 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
446 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
447 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
448 bf1[16] = bf0[16];
449 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
450 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
451 bf1[19] = bf0[19];
452 bf1[20] = bf0[20];
453 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
454 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
455 bf1[23] = bf0[23];
456 bf1[24] = bf0[24];
457 bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
458 bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
459 bf1[27] = bf0[27];
460 bf1[28] = bf0[28];
461 bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
462 bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
463 bf1[31] = bf0[31];
464 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
465
466 // stage 5
467 stage++;
468 bf0 = step;
469 bf1 = output;
470 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
471 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
472 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
473 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
474 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
475 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
476 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
477 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
478 bf1[8] = bf0[8];
479 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
480 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
481 bf1[11] = bf0[11];
482 bf1[12] = bf0[12];
483 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
484 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
485 bf1[15] = bf0[15];
486 bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
487 bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
488 bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
489 bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
490 bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
491 bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
492 bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
493 bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
494 bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
495 bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
496 bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
497 bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
498 bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
499 bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
500 bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
501 bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
502 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
503
504 // stage 6
505 stage++;
506 bf0 = output;
507 bf1 = step;
508 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
509 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
510 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
511 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
512 bf1[4] = bf0[4];
513 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
514 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
515 bf1[7] = bf0[7];
516 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
517 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
518 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
519 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
520 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
521 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
522 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
523 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
524 bf1[16] = bf0[16];
525 bf1[17] = bf0[17];
526 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
527 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
528 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
529 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
530 bf1[22] = bf0[22];
531 bf1[23] = bf0[23];
532 bf1[24] = bf0[24];
533 bf1[25] = bf0[25];
534 bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
535 bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
536 bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
537 bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
538 bf1[30] = bf0[30];
539 bf1[31] = bf0[31];
540 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
541
542 // stage 7
543 stage++;
544 bf0 = step;
545 bf1 = output;
546 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
547 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
548 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
549 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
550 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
551 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
552 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
553 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
554 bf1[8] = bf0[8];
555 bf1[9] = bf0[9];
556 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
557 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
558 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
559 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
560 bf1[14] = bf0[14];
561 bf1[15] = bf0[15];
562 bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
563 bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
564 bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
565 bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
566 bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
567 bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
568 bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
569 bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
570 bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
571 bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
572 bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
573 bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
574 bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
575 bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
576 bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
577 bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
578 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
579
580 // stage 8
581 stage++;
582 bf0 = output;
583 bf1 = step;
584 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
585 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
586 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
587 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
588 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
589 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
590 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
591 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
592 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
593 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
594 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
595 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
596 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
597 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
598 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
599 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
600 bf1[16] = bf0[16];
601 bf1[17] = bf0[17];
602 bf1[18] = bf0[18];
603 bf1[19] = bf0[19];
604 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
605 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
606 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
607 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
608 bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
609 bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
610 bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
611 bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
612 bf1[28] = bf0[28];
613 bf1[29] = bf0[29];
614 bf1[30] = bf0[30];
615 bf1[31] = bf0[31];
616 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
617
618 // stage 9
619 stage++;
620 bf0 = step;
621 bf1 = output;
622 bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
623 bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
624 bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
625 bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
626 bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
627 bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
628 bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
629 bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
630 bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
631 bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
632 bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
633 bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
634 bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
635 bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
636 bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
637 bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
638 bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
639 bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
640 bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
641 bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
642 bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
643 bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
644 bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
645 bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
646 bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
647 bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
648 bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
649 bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
650 bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
651 bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
652 bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
653 bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
654 }
655
av1_iadst4(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)656 void av1_iadst4(const int32_t *input, int32_t *output, int8_t cos_bit,
657 const int8_t *stage_range) {
658 int bit = cos_bit;
659 const int32_t *sinpi = sinpi_arr(bit);
660 int32_t s0, s1, s2, s3, s4, s5, s6, s7;
661
662 int32_t x0 = input[0];
663 int32_t x1 = input[1];
664 int32_t x2 = input[2];
665 int32_t x3 = input[3];
666
667 if (!(x0 | x1 | x2 | x3)) {
668 output[0] = output[1] = output[2] = output[3] = 0;
669 return;
670 }
671
672 assert(sinpi[1] + sinpi[2] == sinpi[4]);
673
674 // stage 1
675 s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit);
676 s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit);
677 s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit);
678 s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit);
679 s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit);
680 s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit);
681 s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit);
682
683 // stage 2
684 // NOTICE: (x0 - x2) here may use one extra bit compared to the
685 // opt_range_row/col specified in av1_gen_inv_stage_range()
686 s7 = range_check_value((x0 - x2) + x3, stage_range[2]);
687
688 // stage 3
689 s0 = range_check_value(s0 + s3, stage_range[3] + bit);
690 s1 = range_check_value(s1 - s4, stage_range[3] + bit);
691 s3 = range_check_value(s2, stage_range[3] + bit);
692 s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit);
693
694 // stage 4
695 s0 = range_check_value(s0 + s5, stage_range[4] + bit);
696 s1 = range_check_value(s1 - s6, stage_range[4] + bit);
697
698 // stage 5
699 x0 = range_check_value(s0 + s3, stage_range[5] + bit);
700 x1 = range_check_value(s1 + s3, stage_range[5] + bit);
701 x2 = range_check_value(s2, stage_range[5] + bit);
702 x3 = range_check_value(s0 + s1, stage_range[5] + bit);
703
704 // stage 6
705 x3 = range_check_value(x3 - s3, stage_range[6] + bit);
706
707 output[0] = round_shift(x0, bit);
708 output[1] = round_shift(x1, bit);
709 output[2] = round_shift(x2, bit);
710 output[3] = round_shift(x3, bit);
711 }
712
av1_iadst8(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)713 void av1_iadst8(const int32_t *input, int32_t *output, int8_t cos_bit,
714 const int8_t *stage_range) {
715 assert(output != input);
716 const int32_t size = 8;
717 const int32_t *cospi = cospi_arr(cos_bit);
718
719 int32_t stage = 0;
720 int32_t *bf0, *bf1;
721 int32_t step[8];
722
723 // stage 0;
724
725 // stage 1;
726 stage++;
727 bf1 = output;
728 bf1[0] = input[7];
729 bf1[1] = input[0];
730 bf1[2] = input[5];
731 bf1[3] = input[2];
732 bf1[4] = input[3];
733 bf1[5] = input[4];
734 bf1[6] = input[1];
735 bf1[7] = input[6];
736 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
737
738 // stage 2
739 stage++;
740 bf0 = output;
741 bf1 = step;
742 bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
743 bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
744 bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
745 bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
746 bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
747 bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
748 bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
749 bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
750 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
751
752 // stage 3
753 stage++;
754 bf0 = step;
755 bf1 = output;
756 bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
757 bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
758 bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
759 bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
760 bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
761 bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
762 bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
763 bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
764 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
765
766 // stage 4
767 stage++;
768 bf0 = output;
769 bf1 = step;
770 bf1[0] = bf0[0];
771 bf1[1] = bf0[1];
772 bf1[2] = bf0[2];
773 bf1[3] = bf0[3];
774 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
775 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
776 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
777 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
778 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
779
780 // stage 5
781 stage++;
782 bf0 = step;
783 bf1 = output;
784 bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
785 bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
786 bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
787 bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
788 bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
789 bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
790 bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
791 bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
792 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
793
794 // stage 6
795 stage++;
796 bf0 = output;
797 bf1 = step;
798 bf1[0] = bf0[0];
799 bf1[1] = bf0[1];
800 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
801 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
802 bf1[4] = bf0[4];
803 bf1[5] = bf0[5];
804 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
805 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
806 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
807
808 // stage 7
809 bf0 = step;
810 bf1 = output;
811 bf1[0] = bf0[0];
812 bf1[1] = -bf0[4];
813 bf1[2] = bf0[6];
814 bf1[3] = -bf0[2];
815 bf1[4] = bf0[3];
816 bf1[5] = -bf0[7];
817 bf1[6] = bf0[5];
818 bf1[7] = -bf0[1];
819 }
820
av1_iadst16(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)821 void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
822 const int8_t *stage_range) {
823 assert(output != input);
824 const int32_t size = 16;
825 const int32_t *cospi = cospi_arr(cos_bit);
826
827 int32_t stage = 0;
828 int32_t *bf0, *bf1;
829 int32_t step[16];
830
831 // stage 0;
832
833 // stage 1;
834 stage++;
835 bf1 = output;
836 bf1[0] = input[15];
837 bf1[1] = input[0];
838 bf1[2] = input[13];
839 bf1[3] = input[2];
840 bf1[4] = input[11];
841 bf1[5] = input[4];
842 bf1[6] = input[9];
843 bf1[7] = input[6];
844 bf1[8] = input[7];
845 bf1[9] = input[8];
846 bf1[10] = input[5];
847 bf1[11] = input[10];
848 bf1[12] = input[3];
849 bf1[13] = input[12];
850 bf1[14] = input[1];
851 bf1[15] = input[14];
852 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
853
854 // stage 2
855 stage++;
856 bf0 = output;
857 bf1 = step;
858 bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
859 bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
860 bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
861 bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
862 bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
863 bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
864 bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
865 bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
866 bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
867 bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
868 bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
869 bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
870 bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
871 bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
872 bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
873 bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
874 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
875
876 // stage 3
877 stage++;
878 bf0 = step;
879 bf1 = output;
880 bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]);
881 bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]);
882 bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]);
883 bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]);
884 bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]);
885 bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]);
886 bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]);
887 bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]);
888 bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]);
889 bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]);
890 bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]);
891 bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]);
892 bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]);
893 bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]);
894 bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]);
895 bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]);
896 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
897
898 // stage 4
899 stage++;
900 bf0 = output;
901 bf1 = step;
902 bf1[0] = bf0[0];
903 bf1[1] = bf0[1];
904 bf1[2] = bf0[2];
905 bf1[3] = bf0[3];
906 bf1[4] = bf0[4];
907 bf1[5] = bf0[5];
908 bf1[6] = bf0[6];
909 bf1[7] = bf0[7];
910 bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
911 bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
912 bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
913 bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
914 bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
915 bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
916 bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
917 bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
918 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
919
920 // stage 5
921 stage++;
922 bf0 = step;
923 bf1 = output;
924 bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
925 bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
926 bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
927 bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
928 bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
929 bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
930 bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
931 bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
932 bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]);
933 bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]);
934 bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]);
935 bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]);
936 bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]);
937 bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]);
938 bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]);
939 bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]);
940 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
941
942 // stage 6
943 stage++;
944 bf0 = output;
945 bf1 = step;
946 bf1[0] = bf0[0];
947 bf1[1] = bf0[1];
948 bf1[2] = bf0[2];
949 bf1[3] = bf0[3];
950 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
951 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
952 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
953 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
954 bf1[8] = bf0[8];
955 bf1[9] = bf0[9];
956 bf1[10] = bf0[10];
957 bf1[11] = bf0[11];
958 bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
959 bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
960 bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
961 bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
962 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
963
964 // stage 7
965 stage++;
966 bf0 = step;
967 bf1 = output;
968 bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
969 bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
970 bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
971 bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
972 bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
973 bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
974 bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
975 bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
976 bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]);
977 bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]);
978 bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]);
979 bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]);
980 bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]);
981 bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]);
982 bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]);
983 bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]);
984 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
985
986 // stage 8
987 stage++;
988 bf0 = output;
989 bf1 = step;
990 bf1[0] = bf0[0];
991 bf1[1] = bf0[1];
992 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
993 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
994 bf1[4] = bf0[4];
995 bf1[5] = bf0[5];
996 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
997 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
998 bf1[8] = bf0[8];
999 bf1[9] = bf0[9];
1000 bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
1001 bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
1002 bf1[12] = bf0[12];
1003 bf1[13] = bf0[13];
1004 bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
1005 bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
1006 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1007
1008 // stage 9
1009 bf0 = step;
1010 bf1 = output;
1011 bf1[0] = bf0[0];
1012 bf1[1] = -bf0[8];
1013 bf1[2] = bf0[12];
1014 bf1[3] = -bf0[4];
1015 bf1[4] = bf0[6];
1016 bf1[5] = -bf0[14];
1017 bf1[6] = bf0[10];
1018 bf1[7] = -bf0[2];
1019 bf1[8] = bf0[3];
1020 bf1[9] = -bf0[11];
1021 bf1[10] = bf0[15];
1022 bf1[11] = -bf0[7];
1023 bf1[12] = bf0[5];
1024 bf1[13] = -bf0[13];
1025 bf1[14] = bf0[9];
1026 bf1[15] = -bf0[1];
1027 }
1028
av1_iidentity4_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1029 void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1030 const int8_t *stage_range) {
1031 (void)cos_bit;
1032 (void)stage_range;
1033 for (int i = 0; i < 4; ++i) {
1034 output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits);
1035 }
1036 assert(stage_range[0] + NewSqrt2Bits <= 32);
1037 }
1038
av1_iidentity8_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1039 void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1040 const int8_t *stage_range) {
1041 (void)cos_bit;
1042 (void)stage_range;
1043 for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2);
1044 }
1045
av1_iidentity16_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1046 void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1047 const int8_t *stage_range) {
1048 (void)cos_bit;
1049 (void)stage_range;
1050 for (int i = 0; i < 16; ++i)
1051 output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits);
1052 assert(stage_range[0] + NewSqrt2Bits <= 32);
1053 }
1054
av1_iidentity32_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1055 void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1056 const int8_t *stage_range) {
1057 (void)cos_bit;
1058 (void)stage_range;
1059 for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
1060 }
1061
av1_idct64(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1062 void av1_idct64(const int32_t *input, int32_t *output, int8_t cos_bit,
1063 const int8_t *stage_range) {
1064 assert(output != input);
1065 const int32_t size = 64;
1066 const int32_t *cospi = cospi_arr(cos_bit);
1067
1068 int32_t stage = 0;
1069 int32_t *bf0, *bf1;
1070 int32_t step[64];
1071
1072 // stage 0;
1073
1074 // stage 1;
1075 stage++;
1076 bf1 = output;
1077 bf1[0] = input[0];
1078 bf1[1] = input[32];
1079 bf1[2] = input[16];
1080 bf1[3] = input[48];
1081 bf1[4] = input[8];
1082 bf1[5] = input[40];
1083 bf1[6] = input[24];
1084 bf1[7] = input[56];
1085 bf1[8] = input[4];
1086 bf1[9] = input[36];
1087 bf1[10] = input[20];
1088 bf1[11] = input[52];
1089 bf1[12] = input[12];
1090 bf1[13] = input[44];
1091 bf1[14] = input[28];
1092 bf1[15] = input[60];
1093 bf1[16] = input[2];
1094 bf1[17] = input[34];
1095 bf1[18] = input[18];
1096 bf1[19] = input[50];
1097 bf1[20] = input[10];
1098 bf1[21] = input[42];
1099 bf1[22] = input[26];
1100 bf1[23] = input[58];
1101 bf1[24] = input[6];
1102 bf1[25] = input[38];
1103 bf1[26] = input[22];
1104 bf1[27] = input[54];
1105 bf1[28] = input[14];
1106 bf1[29] = input[46];
1107 bf1[30] = input[30];
1108 bf1[31] = input[62];
1109 bf1[32] = input[1];
1110 bf1[33] = input[33];
1111 bf1[34] = input[17];
1112 bf1[35] = input[49];
1113 bf1[36] = input[9];
1114 bf1[37] = input[41];
1115 bf1[38] = input[25];
1116 bf1[39] = input[57];
1117 bf1[40] = input[5];
1118 bf1[41] = input[37];
1119 bf1[42] = input[21];
1120 bf1[43] = input[53];
1121 bf1[44] = input[13];
1122 bf1[45] = input[45];
1123 bf1[46] = input[29];
1124 bf1[47] = input[61];
1125 bf1[48] = input[3];
1126 bf1[49] = input[35];
1127 bf1[50] = input[19];
1128 bf1[51] = input[51];
1129 bf1[52] = input[11];
1130 bf1[53] = input[43];
1131 bf1[54] = input[27];
1132 bf1[55] = input[59];
1133 bf1[56] = input[7];
1134 bf1[57] = input[39];
1135 bf1[58] = input[23];
1136 bf1[59] = input[55];
1137 bf1[60] = input[15];
1138 bf1[61] = input[47];
1139 bf1[62] = input[31];
1140 bf1[63] = input[63];
1141 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1142
1143 // stage 2
1144 stage++;
1145 bf0 = output;
1146 bf1 = step;
1147 bf1[0] = bf0[0];
1148 bf1[1] = bf0[1];
1149 bf1[2] = bf0[2];
1150 bf1[3] = bf0[3];
1151 bf1[4] = bf0[4];
1152 bf1[5] = bf0[5];
1153 bf1[6] = bf0[6];
1154 bf1[7] = bf0[7];
1155 bf1[8] = bf0[8];
1156 bf1[9] = bf0[9];
1157 bf1[10] = bf0[10];
1158 bf1[11] = bf0[11];
1159 bf1[12] = bf0[12];
1160 bf1[13] = bf0[13];
1161 bf1[14] = bf0[14];
1162 bf1[15] = bf0[15];
1163 bf1[16] = bf0[16];
1164 bf1[17] = bf0[17];
1165 bf1[18] = bf0[18];
1166 bf1[19] = bf0[19];
1167 bf1[20] = bf0[20];
1168 bf1[21] = bf0[21];
1169 bf1[22] = bf0[22];
1170 bf1[23] = bf0[23];
1171 bf1[24] = bf0[24];
1172 bf1[25] = bf0[25];
1173 bf1[26] = bf0[26];
1174 bf1[27] = bf0[27];
1175 bf1[28] = bf0[28];
1176 bf1[29] = bf0[29];
1177 bf1[30] = bf0[30];
1178 bf1[31] = bf0[31];
1179 bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit);
1180 bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit);
1181 bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit);
1182 bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit);
1183 bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit);
1184 bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit);
1185 bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit);
1186 bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit);
1187 bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit);
1188 bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit);
1189 bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit);
1190 bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit);
1191 bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit);
1192 bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit);
1193 bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit);
1194 bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit);
1195 bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit);
1196 bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit);
1197 bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit);
1198 bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit);
1199 bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit);
1200 bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit);
1201 bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit);
1202 bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit);
1203 bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit);
1204 bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit);
1205 bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit);
1206 bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit);
1207 bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit);
1208 bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit);
1209 bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit);
1210 bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit);
1211 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1212
1213 // stage 3
1214 stage++;
1215 bf0 = step;
1216 bf1 = output;
1217 bf1[0] = bf0[0];
1218 bf1[1] = bf0[1];
1219 bf1[2] = bf0[2];
1220 bf1[3] = bf0[3];
1221 bf1[4] = bf0[4];
1222 bf1[5] = bf0[5];
1223 bf1[6] = bf0[6];
1224 bf1[7] = bf0[7];
1225 bf1[8] = bf0[8];
1226 bf1[9] = bf0[9];
1227 bf1[10] = bf0[10];
1228 bf1[11] = bf0[11];
1229 bf1[12] = bf0[12];
1230 bf1[13] = bf0[13];
1231 bf1[14] = bf0[14];
1232 bf1[15] = bf0[15];
1233 bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
1234 bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
1235 bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
1236 bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
1237 bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
1238 bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
1239 bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
1240 bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
1241 bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
1242 bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
1243 bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
1244 bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
1245 bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
1246 bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
1247 bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
1248 bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
1249 bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]);
1250 bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]);
1251 bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]);
1252 bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]);
1253 bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]);
1254 bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]);
1255 bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]);
1256 bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]);
1257 bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]);
1258 bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]);
1259 bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]);
1260 bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]);
1261 bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]);
1262 bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]);
1263 bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]);
1264 bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]);
1265 bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]);
1266 bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]);
1267 bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]);
1268 bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]);
1269 bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]);
1270 bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]);
1271 bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]);
1272 bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]);
1273 bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]);
1274 bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]);
1275 bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]);
1276 bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]);
1277 bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]);
1278 bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]);
1279 bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]);
1280 bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]);
1281 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1282
1283 // stage 4
1284 stage++;
1285 bf0 = output;
1286 bf1 = step;
1287 bf1[0] = bf0[0];
1288 bf1[1] = bf0[1];
1289 bf1[2] = bf0[2];
1290 bf1[3] = bf0[3];
1291 bf1[4] = bf0[4];
1292 bf1[5] = bf0[5];
1293 bf1[6] = bf0[6];
1294 bf1[7] = bf0[7];
1295 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
1296 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
1297 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
1298 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
1299 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
1300 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
1301 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
1302 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
1303 bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
1304 bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
1305 bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
1306 bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
1307 bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
1308 bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
1309 bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
1310 bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
1311 bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
1312 bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
1313 bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
1314 bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
1315 bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
1316 bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
1317 bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
1318 bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
1319 bf1[32] = bf0[32];
1320 bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
1321 bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
1322 bf1[35] = bf0[35];
1323 bf1[36] = bf0[36];
1324 bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
1325 bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
1326 bf1[39] = bf0[39];
1327 bf1[40] = bf0[40];
1328 bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
1329 bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
1330 bf1[43] = bf0[43];
1331 bf1[44] = bf0[44];
1332 bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
1333 bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
1334 bf1[47] = bf0[47];
1335 bf1[48] = bf0[48];
1336 bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit);
1337 bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit);
1338 bf1[51] = bf0[51];
1339 bf1[52] = bf0[52];
1340 bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit);
1341 bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit);
1342 bf1[55] = bf0[55];
1343 bf1[56] = bf0[56];
1344 bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit);
1345 bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit);
1346 bf1[59] = bf0[59];
1347 bf1[60] = bf0[60];
1348 bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit);
1349 bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit);
1350 bf1[63] = bf0[63];
1351 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1352
1353 // stage 5
1354 stage++;
1355 bf0 = step;
1356 bf1 = output;
1357 bf1[0] = bf0[0];
1358 bf1[1] = bf0[1];
1359 bf1[2] = bf0[2];
1360 bf1[3] = bf0[3];
1361 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
1362 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
1363 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
1364 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
1365 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
1366 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
1367 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
1368 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
1369 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
1370 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
1371 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
1372 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
1373 bf1[16] = bf0[16];
1374 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
1375 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
1376 bf1[19] = bf0[19];
1377 bf1[20] = bf0[20];
1378 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
1379 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
1380 bf1[23] = bf0[23];
1381 bf1[24] = bf0[24];
1382 bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
1383 bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
1384 bf1[27] = bf0[27];
1385 bf1[28] = bf0[28];
1386 bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
1387 bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
1388 bf1[31] = bf0[31];
1389 bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]);
1390 bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]);
1391 bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]);
1392 bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]);
1393 bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]);
1394 bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]);
1395 bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]);
1396 bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]);
1397 bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]);
1398 bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]);
1399 bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]);
1400 bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]);
1401 bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]);
1402 bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]);
1403 bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]);
1404 bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]);
1405 bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]);
1406 bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]);
1407 bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]);
1408 bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]);
1409 bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]);
1410 bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]);
1411 bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]);
1412 bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]);
1413 bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]);
1414 bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]);
1415 bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]);
1416 bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]);
1417 bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]);
1418 bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]);
1419 bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]);
1420 bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]);
1421 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1422
1423 // stage 6
1424 stage++;
1425 bf0 = output;
1426 bf1 = step;
1427 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1428 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
1429 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
1430 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
1431 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
1432 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
1433 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
1434 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
1435 bf1[8] = bf0[8];
1436 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
1437 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
1438 bf1[11] = bf0[11];
1439 bf1[12] = bf0[12];
1440 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
1441 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
1442 bf1[15] = bf0[15];
1443 bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
1444 bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
1445 bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
1446 bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
1447 bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
1448 bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
1449 bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
1450 bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
1451 bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
1452 bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
1453 bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
1454 bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
1455 bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
1456 bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
1457 bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
1458 bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
1459 bf1[32] = bf0[32];
1460 bf1[33] = bf0[33];
1461 bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
1462 bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
1463 bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
1464 bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
1465 bf1[38] = bf0[38];
1466 bf1[39] = bf0[39];
1467 bf1[40] = bf0[40];
1468 bf1[41] = bf0[41];
1469 bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
1470 bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
1471 bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
1472 bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
1473 bf1[46] = bf0[46];
1474 bf1[47] = bf0[47];
1475 bf1[48] = bf0[48];
1476 bf1[49] = bf0[49];
1477 bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit);
1478 bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit);
1479 bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit);
1480 bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit);
1481 bf1[54] = bf0[54];
1482 bf1[55] = bf0[55];
1483 bf1[56] = bf0[56];
1484 bf1[57] = bf0[57];
1485 bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit);
1486 bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit);
1487 bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit);
1488 bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit);
1489 bf1[62] = bf0[62];
1490 bf1[63] = bf0[63];
1491 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1492
1493 // stage 7
1494 stage++;
1495 bf0 = step;
1496 bf1 = output;
1497 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
1498 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
1499 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
1500 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
1501 bf1[4] = bf0[4];
1502 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1503 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1504 bf1[7] = bf0[7];
1505 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
1506 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
1507 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
1508 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
1509 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
1510 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
1511 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
1512 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
1513 bf1[16] = bf0[16];
1514 bf1[17] = bf0[17];
1515 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
1516 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
1517 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
1518 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
1519 bf1[22] = bf0[22];
1520 bf1[23] = bf0[23];
1521 bf1[24] = bf0[24];
1522 bf1[25] = bf0[25];
1523 bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
1524 bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
1525 bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
1526 bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
1527 bf1[30] = bf0[30];
1528 bf1[31] = bf0[31];
1529 bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]);
1530 bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]);
1531 bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]);
1532 bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]);
1533 bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]);
1534 bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]);
1535 bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]);
1536 bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]);
1537 bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]);
1538 bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]);
1539 bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]);
1540 bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]);
1541 bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]);
1542 bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]);
1543 bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]);
1544 bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]);
1545 bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]);
1546 bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]);
1547 bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]);
1548 bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]);
1549 bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]);
1550 bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]);
1551 bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]);
1552 bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]);
1553 bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]);
1554 bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]);
1555 bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]);
1556 bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]);
1557 bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]);
1558 bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]);
1559 bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]);
1560 bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]);
1561 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1562
1563 // stage 8
1564 stage++;
1565 bf0 = output;
1566 bf1 = step;
1567 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
1568 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
1569 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
1570 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
1571 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
1572 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
1573 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
1574 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
1575 bf1[8] = bf0[8];
1576 bf1[9] = bf0[9];
1577 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1578 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1579 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1580 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1581 bf1[14] = bf0[14];
1582 bf1[15] = bf0[15];
1583 bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
1584 bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
1585 bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
1586 bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
1587 bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
1588 bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
1589 bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
1590 bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
1591 bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
1592 bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
1593 bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
1594 bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
1595 bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
1596 bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
1597 bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
1598 bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
1599 bf1[32] = bf0[32];
1600 bf1[33] = bf0[33];
1601 bf1[34] = bf0[34];
1602 bf1[35] = bf0[35];
1603 bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
1604 bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
1605 bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
1606 bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
1607 bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
1608 bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
1609 bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
1610 bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
1611 bf1[44] = bf0[44];
1612 bf1[45] = bf0[45];
1613 bf1[46] = bf0[46];
1614 bf1[47] = bf0[47];
1615 bf1[48] = bf0[48];
1616 bf1[49] = bf0[49];
1617 bf1[50] = bf0[50];
1618 bf1[51] = bf0[51];
1619 bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit);
1620 bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit);
1621 bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit);
1622 bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit);
1623 bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit);
1624 bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit);
1625 bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit);
1626 bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit);
1627 bf1[60] = bf0[60];
1628 bf1[61] = bf0[61];
1629 bf1[62] = bf0[62];
1630 bf1[63] = bf0[63];
1631 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1632
1633 // stage 9
1634 stage++;
1635 bf0 = step;
1636 bf1 = output;
1637 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
1638 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
1639 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
1640 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
1641 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
1642 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
1643 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
1644 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
1645 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
1646 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
1647 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
1648 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
1649 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
1650 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
1651 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
1652 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
1653 bf1[16] = bf0[16];
1654 bf1[17] = bf0[17];
1655 bf1[18] = bf0[18];
1656 bf1[19] = bf0[19];
1657 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
1658 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
1659 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
1660 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
1661 bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
1662 bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
1663 bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
1664 bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
1665 bf1[28] = bf0[28];
1666 bf1[29] = bf0[29];
1667 bf1[30] = bf0[30];
1668 bf1[31] = bf0[31];
1669 bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]);
1670 bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]);
1671 bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]);
1672 bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]);
1673 bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]);
1674 bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]);
1675 bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]);
1676 bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]);
1677 bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]);
1678 bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]);
1679 bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]);
1680 bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]);
1681 bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]);
1682 bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]);
1683 bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]);
1684 bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]);
1685 bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]);
1686 bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]);
1687 bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]);
1688 bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]);
1689 bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]);
1690 bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]);
1691 bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]);
1692 bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]);
1693 bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]);
1694 bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]);
1695 bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]);
1696 bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]);
1697 bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]);
1698 bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]);
1699 bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]);
1700 bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]);
1701 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1702
1703 // stage 10
1704 stage++;
1705 bf0 = output;
1706 bf1 = step;
1707 bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
1708 bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
1709 bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
1710 bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
1711 bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
1712 bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
1713 bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
1714 bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
1715 bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
1716 bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
1717 bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
1718 bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
1719 bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
1720 bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
1721 bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
1722 bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
1723 bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
1724 bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
1725 bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
1726 bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
1727 bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
1728 bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
1729 bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
1730 bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
1731 bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
1732 bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
1733 bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
1734 bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
1735 bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
1736 bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
1737 bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
1738 bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
1739 bf1[32] = bf0[32];
1740 bf1[33] = bf0[33];
1741 bf1[34] = bf0[34];
1742 bf1[35] = bf0[35];
1743 bf1[36] = bf0[36];
1744 bf1[37] = bf0[37];
1745 bf1[38] = bf0[38];
1746 bf1[39] = bf0[39];
1747 bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
1748 bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
1749 bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
1750 bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
1751 bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
1752 bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
1753 bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
1754 bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
1755 bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
1756 bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
1757 bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
1758 bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
1759 bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
1760 bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
1761 bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
1762 bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
1763 bf1[56] = bf0[56];
1764 bf1[57] = bf0[57];
1765 bf1[58] = bf0[58];
1766 bf1[59] = bf0[59];
1767 bf1[60] = bf0[60];
1768 bf1[61] = bf0[61];
1769 bf1[62] = bf0[62];
1770 bf1[63] = bf0[63];
1771 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1772
1773 // stage 11
1774 stage++;
1775 bf0 = step;
1776 bf1 = output;
1777 bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]);
1778 bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]);
1779 bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]);
1780 bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]);
1781 bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]);
1782 bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]);
1783 bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]);
1784 bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]);
1785 bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]);
1786 bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]);
1787 bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]);
1788 bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]);
1789 bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]);
1790 bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]);
1791 bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]);
1792 bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]);
1793 bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]);
1794 bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]);
1795 bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]);
1796 bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]);
1797 bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]);
1798 bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]);
1799 bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]);
1800 bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]);
1801 bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]);
1802 bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]);
1803 bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]);
1804 bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]);
1805 bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]);
1806 bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]);
1807 bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]);
1808 bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]);
1809 bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]);
1810 bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]);
1811 bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]);
1812 bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]);
1813 bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]);
1814 bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]);
1815 bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]);
1816 bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]);
1817 bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]);
1818 bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]);
1819 bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]);
1820 bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]);
1821 bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]);
1822 bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]);
1823 bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]);
1824 bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]);
1825 bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]);
1826 bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]);
1827 bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]);
1828 bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]);
1829 bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]);
1830 bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]);
1831 bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]);
1832 bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]);
1833 bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]);
1834 bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]);
1835 bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]);
1836 bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]);
1837 bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]);
1838 bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]);
1839 bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]);
1840 bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]);
1841 }
1842