1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdlib.h>
13 #include "av1/common/av1_inv_txfm1d.h"
14 #include "av1/common/av1_txfm.h"
15
16 // TODO(angiebird): Make 1-d txfm functions static
17 //
18
av1_idct4_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)19 void av1_idct4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
20 const int8_t *stage_range) {
21 assert(output != input);
22 const int32_t size = 4;
23 const int32_t *cospi = cospi_arr(cos_bit);
24
25 int32_t stage = 0;
26 int32_t *bf0, *bf1;
27 int32_t step[4];
28
29 // stage 0;
30
31 // stage 1;
32 stage++;
33 bf1 = output;
34 bf1[0] = input[0];
35 bf1[1] = input[2];
36 bf1[2] = input[1];
37 bf1[3] = input[3];
38 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
39
40 // stage 2
41 stage++;
42 bf0 = output;
43 bf1 = step;
44 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
45 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
46 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
47 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
48 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
49
50 // stage 3
51 stage++;
52 bf0 = step;
53 bf1 = output;
54 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
55 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
56 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
57 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
58 }
59
av1_idct8_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)60 void av1_idct8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
61 const int8_t *stage_range) {
62 assert(output != input);
63 const int32_t size = 8;
64 const int32_t *cospi = cospi_arr(cos_bit);
65
66 int32_t stage = 0;
67 int32_t *bf0, *bf1;
68 int32_t step[8];
69
70 // stage 0;
71
72 // stage 1;
73 stage++;
74 bf1 = output;
75 bf1[0] = input[0];
76 bf1[1] = input[4];
77 bf1[2] = input[2];
78 bf1[3] = input[6];
79 bf1[4] = input[1];
80 bf1[5] = input[5];
81 bf1[6] = input[3];
82 bf1[7] = input[7];
83 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
84
85 // stage 2
86 stage++;
87 bf0 = output;
88 bf1 = step;
89 bf1[0] = bf0[0];
90 bf1[1] = bf0[1];
91 bf1[2] = bf0[2];
92 bf1[3] = bf0[3];
93 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
94 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
95 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
96 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
97 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
98
99 // stage 3
100 stage++;
101 bf0 = step;
102 bf1 = output;
103 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
104 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
105 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
106 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
107 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
108 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
109 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
110 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
111 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
112
113 // stage 4
114 stage++;
115 bf0 = output;
116 bf1 = step;
117 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
118 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
119 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
120 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
121 bf1[4] = bf0[4];
122 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
123 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
124 bf1[7] = bf0[7];
125 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
126
127 // stage 5
128 stage++;
129 bf0 = step;
130 bf1 = output;
131 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
132 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
133 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
134 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
135 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
136 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
137 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
138 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
139 }
140
av1_idct16_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)141 void av1_idct16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
142 const int8_t *stage_range) {
143 assert(output != input);
144 const int32_t size = 16;
145 const int32_t *cospi = cospi_arr(cos_bit);
146
147 int32_t stage = 0;
148 int32_t *bf0, *bf1;
149 int32_t step[16];
150
151 // stage 0;
152
153 // stage 1;
154 stage++;
155 bf1 = output;
156 bf1[0] = input[0];
157 bf1[1] = input[8];
158 bf1[2] = input[4];
159 bf1[3] = input[12];
160 bf1[4] = input[2];
161 bf1[5] = input[10];
162 bf1[6] = input[6];
163 bf1[7] = input[14];
164 bf1[8] = input[1];
165 bf1[9] = input[9];
166 bf1[10] = input[5];
167 bf1[11] = input[13];
168 bf1[12] = input[3];
169 bf1[13] = input[11];
170 bf1[14] = input[7];
171 bf1[15] = input[15];
172 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
173
174 // stage 2
175 stage++;
176 bf0 = output;
177 bf1 = step;
178 bf1[0] = bf0[0];
179 bf1[1] = bf0[1];
180 bf1[2] = bf0[2];
181 bf1[3] = bf0[3];
182 bf1[4] = bf0[4];
183 bf1[5] = bf0[5];
184 bf1[6] = bf0[6];
185 bf1[7] = bf0[7];
186 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
187 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
188 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
189 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
190 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
191 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
192 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
193 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
194 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
195
196 // stage 3
197 stage++;
198 bf0 = step;
199 bf1 = output;
200 bf1[0] = bf0[0];
201 bf1[1] = bf0[1];
202 bf1[2] = bf0[2];
203 bf1[3] = bf0[3];
204 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
205 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
206 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
207 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
208 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
209 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
210 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
211 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
212 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
213 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
214 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
215 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
216 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
217
218 // stage 4
219 stage++;
220 bf0 = output;
221 bf1 = step;
222 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
223 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
224 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
225 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
226 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
227 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
228 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
229 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
230 bf1[8] = bf0[8];
231 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
232 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
233 bf1[11] = bf0[11];
234 bf1[12] = bf0[12];
235 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
236 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
237 bf1[15] = bf0[15];
238 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
239
240 // stage 5
241 stage++;
242 bf0 = step;
243 bf1 = output;
244 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
245 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
246 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
247 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
248 bf1[4] = bf0[4];
249 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
250 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
251 bf1[7] = bf0[7];
252 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
253 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
254 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
255 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
256 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
257 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
258 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
259 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
260 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
261
262 // stage 6
263 stage++;
264 bf0 = output;
265 bf1 = step;
266 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
267 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
268 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
269 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
270 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
271 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
272 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
273 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
274 bf1[8] = bf0[8];
275 bf1[9] = bf0[9];
276 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
277 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
278 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
279 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
280 bf1[14] = bf0[14];
281 bf1[15] = bf0[15];
282 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
283
284 // stage 7
285 stage++;
286 bf0 = step;
287 bf1 = output;
288 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
289 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
290 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
291 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
292 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
293 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
294 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
295 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
296 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
297 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
298 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
299 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
300 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
301 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
302 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
303 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
304 }
305
av1_idct32_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)306 void av1_idct32_new(const int32_t *input, int32_t *output, int8_t cos_bit,
307 const int8_t *stage_range) {
308 assert(output != input);
309 const int32_t size = 32;
310 const int32_t *cospi = cospi_arr(cos_bit);
311
312 int32_t stage = 0;
313 int32_t *bf0, *bf1;
314 int32_t step[32];
315
316 // stage 0;
317
318 // stage 1;
319 stage++;
320 bf1 = output;
321 bf1[0] = input[0];
322 bf1[1] = input[16];
323 bf1[2] = input[8];
324 bf1[3] = input[24];
325 bf1[4] = input[4];
326 bf1[5] = input[20];
327 bf1[6] = input[12];
328 bf1[7] = input[28];
329 bf1[8] = input[2];
330 bf1[9] = input[18];
331 bf1[10] = input[10];
332 bf1[11] = input[26];
333 bf1[12] = input[6];
334 bf1[13] = input[22];
335 bf1[14] = input[14];
336 bf1[15] = input[30];
337 bf1[16] = input[1];
338 bf1[17] = input[17];
339 bf1[18] = input[9];
340 bf1[19] = input[25];
341 bf1[20] = input[5];
342 bf1[21] = input[21];
343 bf1[22] = input[13];
344 bf1[23] = input[29];
345 bf1[24] = input[3];
346 bf1[25] = input[19];
347 bf1[26] = input[11];
348 bf1[27] = input[27];
349 bf1[28] = input[7];
350 bf1[29] = input[23];
351 bf1[30] = input[15];
352 bf1[31] = input[31];
353 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
354
355 // stage 2
356 stage++;
357 bf0 = output;
358 bf1 = step;
359 bf1[0] = bf0[0];
360 bf1[1] = bf0[1];
361 bf1[2] = bf0[2];
362 bf1[3] = bf0[3];
363 bf1[4] = bf0[4];
364 bf1[5] = bf0[5];
365 bf1[6] = bf0[6];
366 bf1[7] = bf0[7];
367 bf1[8] = bf0[8];
368 bf1[9] = bf0[9];
369 bf1[10] = bf0[10];
370 bf1[11] = bf0[11];
371 bf1[12] = bf0[12];
372 bf1[13] = bf0[13];
373 bf1[14] = bf0[14];
374 bf1[15] = bf0[15];
375 bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
376 bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
377 bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
378 bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
379 bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
380 bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
381 bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
382 bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
383 bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
384 bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
385 bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
386 bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
387 bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
388 bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
389 bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
390 bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
391 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
392
393 // stage 3
394 stage++;
395 bf0 = step;
396 bf1 = output;
397 bf1[0] = bf0[0];
398 bf1[1] = bf0[1];
399 bf1[2] = bf0[2];
400 bf1[3] = bf0[3];
401 bf1[4] = bf0[4];
402 bf1[5] = bf0[5];
403 bf1[6] = bf0[6];
404 bf1[7] = bf0[7];
405 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
406 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
407 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
408 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
409 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
410 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
411 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
412 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
413 bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
414 bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
415 bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
416 bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
417 bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
418 bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
419 bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
420 bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
421 bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
422 bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
423 bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
424 bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
425 bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
426 bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
427 bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
428 bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
429 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
430
431 // stage 4
432 stage++;
433 bf0 = output;
434 bf1 = step;
435 bf1[0] = bf0[0];
436 bf1[1] = bf0[1];
437 bf1[2] = bf0[2];
438 bf1[3] = bf0[3];
439 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
440 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
441 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
442 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
443 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
444 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
445 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
446 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
447 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
448 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
449 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
450 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
451 bf1[16] = bf0[16];
452 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
453 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
454 bf1[19] = bf0[19];
455 bf1[20] = bf0[20];
456 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
457 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
458 bf1[23] = bf0[23];
459 bf1[24] = bf0[24];
460 bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
461 bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
462 bf1[27] = bf0[27];
463 bf1[28] = bf0[28];
464 bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
465 bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
466 bf1[31] = bf0[31];
467 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
468
469 // stage 5
470 stage++;
471 bf0 = step;
472 bf1 = output;
473 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
474 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
475 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
476 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
477 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
478 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
479 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
480 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
481 bf1[8] = bf0[8];
482 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
483 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
484 bf1[11] = bf0[11];
485 bf1[12] = bf0[12];
486 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
487 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
488 bf1[15] = bf0[15];
489 bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
490 bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
491 bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
492 bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
493 bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
494 bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
495 bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
496 bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
497 bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
498 bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
499 bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
500 bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
501 bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
502 bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
503 bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
504 bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
505 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
506
507 // stage 6
508 stage++;
509 bf0 = output;
510 bf1 = step;
511 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
512 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
513 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
514 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
515 bf1[4] = bf0[4];
516 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
517 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
518 bf1[7] = bf0[7];
519 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
520 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
521 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
522 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
523 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
524 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
525 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
526 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
527 bf1[16] = bf0[16];
528 bf1[17] = bf0[17];
529 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
530 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
531 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
532 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
533 bf1[22] = bf0[22];
534 bf1[23] = bf0[23];
535 bf1[24] = bf0[24];
536 bf1[25] = bf0[25];
537 bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
538 bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
539 bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
540 bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
541 bf1[30] = bf0[30];
542 bf1[31] = bf0[31];
543 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
544
545 // stage 7
546 stage++;
547 bf0 = step;
548 bf1 = output;
549 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
550 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
551 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
552 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
553 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
554 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
555 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
556 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
557 bf1[8] = bf0[8];
558 bf1[9] = bf0[9];
559 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
560 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
561 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
562 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
563 bf1[14] = bf0[14];
564 bf1[15] = bf0[15];
565 bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
566 bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
567 bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
568 bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
569 bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
570 bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
571 bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
572 bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
573 bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
574 bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
575 bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
576 bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
577 bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
578 bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
579 bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
580 bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
581 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
582
583 // stage 8
584 stage++;
585 bf0 = output;
586 bf1 = step;
587 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
588 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
589 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
590 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
591 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
592 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
593 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
594 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
595 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
596 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
597 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
598 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
599 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
600 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
601 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
602 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
603 bf1[16] = bf0[16];
604 bf1[17] = bf0[17];
605 bf1[18] = bf0[18];
606 bf1[19] = bf0[19];
607 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
608 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
609 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
610 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
611 bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
612 bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
613 bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
614 bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
615 bf1[28] = bf0[28];
616 bf1[29] = bf0[29];
617 bf1[30] = bf0[30];
618 bf1[31] = bf0[31];
619 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
620
621 // stage 9
622 stage++;
623 bf0 = step;
624 bf1 = output;
625 bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
626 bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
627 bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
628 bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
629 bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
630 bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
631 bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
632 bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
633 bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
634 bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
635 bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
636 bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
637 bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
638 bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
639 bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
640 bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
641 bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
642 bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
643 bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
644 bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
645 bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
646 bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
647 bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
648 bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
649 bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
650 bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
651 bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
652 bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
653 bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
654 bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
655 bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
656 bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
657 }
658
av1_iadst4_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)659 void av1_iadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit,
660 const int8_t *stage_range) {
661 int bit = cos_bit;
662 const int32_t *sinpi = sinpi_arr(bit);
663 int32_t s0, s1, s2, s3, s4, s5, s6, s7;
664
665 int32_t x0 = input[0];
666 int32_t x1 = input[1];
667 int32_t x2 = input[2];
668 int32_t x3 = input[3];
669
670 if (!(x0 | x1 | x2 | x3)) {
671 output[0] = output[1] = output[2] = output[3] = 0;
672 return;
673 }
674
675 assert(sinpi[1] + sinpi[2] == sinpi[4]);
676
677 // stage 1
678 s0 = range_check_value(sinpi[1] * x0, stage_range[1] + bit);
679 s1 = range_check_value(sinpi[2] * x0, stage_range[1] + bit);
680 s2 = range_check_value(sinpi[3] * x1, stage_range[1] + bit);
681 s3 = range_check_value(sinpi[4] * x2, stage_range[1] + bit);
682 s4 = range_check_value(sinpi[1] * x2, stage_range[1] + bit);
683 s5 = range_check_value(sinpi[2] * x3, stage_range[1] + bit);
684 s6 = range_check_value(sinpi[4] * x3, stage_range[1] + bit);
685
686 // stage 2
687 // NOTICE: (x0 - x2) here may use one extra bit compared to the
688 // opt_range_row/col specified in av1_gen_inv_stage_range()
689 s7 = range_check_value((x0 - x2) + x3, stage_range[2]);
690
691 // stage 3
692 s0 = range_check_value(s0 + s3, stage_range[3] + bit);
693 s1 = range_check_value(s1 - s4, stage_range[3] + bit);
694 s3 = range_check_value(s2, stage_range[3] + bit);
695 s2 = range_check_value(sinpi[3] * s7, stage_range[3] + bit);
696
697 // stage 4
698 s0 = range_check_value(s0 + s5, stage_range[4] + bit);
699 s1 = range_check_value(s1 - s6, stage_range[4] + bit);
700
701 // stage 5
702 x0 = range_check_value(s0 + s3, stage_range[5] + bit);
703 x1 = range_check_value(s1 + s3, stage_range[5] + bit);
704 x2 = range_check_value(s2, stage_range[5] + bit);
705 x3 = range_check_value(s0 + s1, stage_range[5] + bit);
706
707 // stage 6
708 x3 = range_check_value(x3 - s3, stage_range[6] + bit);
709
710 output[0] = round_shift(x0, bit);
711 output[1] = round_shift(x1, bit);
712 output[2] = round_shift(x2, bit);
713 output[3] = round_shift(x3, bit);
714 }
715
av1_iadst8_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)716 void av1_iadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit,
717 const int8_t *stage_range) {
718 assert(output != input);
719 const int32_t size = 8;
720 const int32_t *cospi = cospi_arr(cos_bit);
721
722 int32_t stage = 0;
723 int32_t *bf0, *bf1;
724 int32_t step[8];
725
726 // stage 0;
727
728 // stage 1;
729 stage++;
730 bf1 = output;
731 bf1[0] = input[7];
732 bf1[1] = input[0];
733 bf1[2] = input[5];
734 bf1[3] = input[2];
735 bf1[4] = input[3];
736 bf1[5] = input[4];
737 bf1[6] = input[1];
738 bf1[7] = input[6];
739 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
740
741 // stage 2
742 stage++;
743 bf0 = output;
744 bf1 = step;
745 bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit);
746 bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit);
747 bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit);
748 bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit);
749 bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit);
750 bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit);
751 bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit);
752 bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit);
753 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
754
755 // stage 3
756 stage++;
757 bf0 = step;
758 bf1 = output;
759 bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
760 bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
761 bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
762 bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
763 bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
764 bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
765 bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
766 bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
767 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
768
769 // stage 4
770 stage++;
771 bf0 = output;
772 bf1 = step;
773 bf1[0] = bf0[0];
774 bf1[1] = bf0[1];
775 bf1[2] = bf0[2];
776 bf1[3] = bf0[3];
777 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
778 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
779 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
780 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
781 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
782
783 // stage 5
784 stage++;
785 bf0 = step;
786 bf1 = output;
787 bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
788 bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
789 bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
790 bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
791 bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
792 bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
793 bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
794 bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
795 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
796
797 // stage 6
798 stage++;
799 bf0 = output;
800 bf1 = step;
801 bf1[0] = bf0[0];
802 bf1[1] = bf0[1];
803 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
804 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
805 bf1[4] = bf0[4];
806 bf1[5] = bf0[5];
807 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
808 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
809 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
810
811 // stage 7
812 stage++;
813 bf0 = step;
814 bf1 = output;
815 bf1[0] = bf0[0];
816 bf1[1] = -bf0[4];
817 bf1[2] = bf0[6];
818 bf1[3] = -bf0[2];
819 bf1[4] = bf0[3];
820 bf1[5] = -bf0[7];
821 bf1[6] = bf0[5];
822 bf1[7] = -bf0[1];
823 }
824
av1_iadst16_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)825 void av1_iadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit,
826 const int8_t *stage_range) {
827 assert(output != input);
828 const int32_t size = 16;
829 const int32_t *cospi = cospi_arr(cos_bit);
830
831 int32_t stage = 0;
832 int32_t *bf0, *bf1;
833 int32_t step[16];
834
835 // stage 0;
836
837 // stage 1;
838 stage++;
839 bf1 = output;
840 bf1[0] = input[15];
841 bf1[1] = input[0];
842 bf1[2] = input[13];
843 bf1[3] = input[2];
844 bf1[4] = input[11];
845 bf1[5] = input[4];
846 bf1[6] = input[9];
847 bf1[7] = input[6];
848 bf1[8] = input[7];
849 bf1[9] = input[8];
850 bf1[10] = input[5];
851 bf1[11] = input[10];
852 bf1[12] = input[3];
853 bf1[13] = input[12];
854 bf1[14] = input[1];
855 bf1[15] = input[14];
856 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
857
858 // stage 2
859 stage++;
860 bf0 = output;
861 bf1 = step;
862 bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit);
863 bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit);
864 bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit);
865 bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit);
866 bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit);
867 bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit);
868 bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit);
869 bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit);
870 bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit);
871 bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit);
872 bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit);
873 bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit);
874 bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit);
875 bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit);
876 bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit);
877 bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit);
878 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
879
880 // stage 3
881 stage++;
882 bf0 = step;
883 bf1 = output;
884 bf1[0] = clamp_value(bf0[0] + bf0[8], stage_range[stage]);
885 bf1[1] = clamp_value(bf0[1] + bf0[9], stage_range[stage]);
886 bf1[2] = clamp_value(bf0[2] + bf0[10], stage_range[stage]);
887 bf1[3] = clamp_value(bf0[3] + bf0[11], stage_range[stage]);
888 bf1[4] = clamp_value(bf0[4] + bf0[12], stage_range[stage]);
889 bf1[5] = clamp_value(bf0[5] + bf0[13], stage_range[stage]);
890 bf1[6] = clamp_value(bf0[6] + bf0[14], stage_range[stage]);
891 bf1[7] = clamp_value(bf0[7] + bf0[15], stage_range[stage]);
892 bf1[8] = clamp_value(bf0[0] - bf0[8], stage_range[stage]);
893 bf1[9] = clamp_value(bf0[1] - bf0[9], stage_range[stage]);
894 bf1[10] = clamp_value(bf0[2] - bf0[10], stage_range[stage]);
895 bf1[11] = clamp_value(bf0[3] - bf0[11], stage_range[stage]);
896 bf1[12] = clamp_value(bf0[4] - bf0[12], stage_range[stage]);
897 bf1[13] = clamp_value(bf0[5] - bf0[13], stage_range[stage]);
898 bf1[14] = clamp_value(bf0[6] - bf0[14], stage_range[stage]);
899 bf1[15] = clamp_value(bf0[7] - bf0[15], stage_range[stage]);
900 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
901
902 // stage 4
903 stage++;
904 bf0 = output;
905 bf1 = step;
906 bf1[0] = bf0[0];
907 bf1[1] = bf0[1];
908 bf1[2] = bf0[2];
909 bf1[3] = bf0[3];
910 bf1[4] = bf0[4];
911 bf1[5] = bf0[5];
912 bf1[6] = bf0[6];
913 bf1[7] = bf0[7];
914 bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit);
915 bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit);
916 bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit);
917 bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit);
918 bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit);
919 bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit);
920 bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit);
921 bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit);
922 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
923
924 // stage 5
925 stage++;
926 bf0 = step;
927 bf1 = output;
928 bf1[0] = clamp_value(bf0[0] + bf0[4], stage_range[stage]);
929 bf1[1] = clamp_value(bf0[1] + bf0[5], stage_range[stage]);
930 bf1[2] = clamp_value(bf0[2] + bf0[6], stage_range[stage]);
931 bf1[3] = clamp_value(bf0[3] + bf0[7], stage_range[stage]);
932 bf1[4] = clamp_value(bf0[0] - bf0[4], stage_range[stage]);
933 bf1[5] = clamp_value(bf0[1] - bf0[5], stage_range[stage]);
934 bf1[6] = clamp_value(bf0[2] - bf0[6], stage_range[stage]);
935 bf1[7] = clamp_value(bf0[3] - bf0[7], stage_range[stage]);
936 bf1[8] = clamp_value(bf0[8] + bf0[12], stage_range[stage]);
937 bf1[9] = clamp_value(bf0[9] + bf0[13], stage_range[stage]);
938 bf1[10] = clamp_value(bf0[10] + bf0[14], stage_range[stage]);
939 bf1[11] = clamp_value(bf0[11] + bf0[15], stage_range[stage]);
940 bf1[12] = clamp_value(bf0[8] - bf0[12], stage_range[stage]);
941 bf1[13] = clamp_value(bf0[9] - bf0[13], stage_range[stage]);
942 bf1[14] = clamp_value(bf0[10] - bf0[14], stage_range[stage]);
943 bf1[15] = clamp_value(bf0[11] - bf0[15], stage_range[stage]);
944 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
945
946 // stage 6
947 stage++;
948 bf0 = output;
949 bf1 = step;
950 bf1[0] = bf0[0];
951 bf1[1] = bf0[1];
952 bf1[2] = bf0[2];
953 bf1[3] = bf0[3];
954 bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit);
955 bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit);
956 bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit);
957 bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit);
958 bf1[8] = bf0[8];
959 bf1[9] = bf0[9];
960 bf1[10] = bf0[10];
961 bf1[11] = bf0[11];
962 bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit);
963 bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit);
964 bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit);
965 bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit);
966 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
967
968 // stage 7
969 stage++;
970 bf0 = step;
971 bf1 = output;
972 bf1[0] = clamp_value(bf0[0] + bf0[2], stage_range[stage]);
973 bf1[1] = clamp_value(bf0[1] + bf0[3], stage_range[stage]);
974 bf1[2] = clamp_value(bf0[0] - bf0[2], stage_range[stage]);
975 bf1[3] = clamp_value(bf0[1] - bf0[3], stage_range[stage]);
976 bf1[4] = clamp_value(bf0[4] + bf0[6], stage_range[stage]);
977 bf1[5] = clamp_value(bf0[5] + bf0[7], stage_range[stage]);
978 bf1[6] = clamp_value(bf0[4] - bf0[6], stage_range[stage]);
979 bf1[7] = clamp_value(bf0[5] - bf0[7], stage_range[stage]);
980 bf1[8] = clamp_value(bf0[8] + bf0[10], stage_range[stage]);
981 bf1[9] = clamp_value(bf0[9] + bf0[11], stage_range[stage]);
982 bf1[10] = clamp_value(bf0[8] - bf0[10], stage_range[stage]);
983 bf1[11] = clamp_value(bf0[9] - bf0[11], stage_range[stage]);
984 bf1[12] = clamp_value(bf0[12] + bf0[14], stage_range[stage]);
985 bf1[13] = clamp_value(bf0[13] + bf0[15], stage_range[stage]);
986 bf1[14] = clamp_value(bf0[12] - bf0[14], stage_range[stage]);
987 bf1[15] = clamp_value(bf0[13] - bf0[15], stage_range[stage]);
988 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
989
990 // stage 8
991 stage++;
992 bf0 = output;
993 bf1 = step;
994 bf1[0] = bf0[0];
995 bf1[1] = bf0[1];
996 bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit);
997 bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit);
998 bf1[4] = bf0[4];
999 bf1[5] = bf0[5];
1000 bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit);
1001 bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit);
1002 bf1[8] = bf0[8];
1003 bf1[9] = bf0[9];
1004 bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit);
1005 bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit);
1006 bf1[12] = bf0[12];
1007 bf1[13] = bf0[13];
1008 bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit);
1009 bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit);
1010 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1011
1012 // stage 9
1013 stage++;
1014 bf0 = step;
1015 bf1 = output;
1016 bf1[0] = bf0[0];
1017 bf1[1] = -bf0[8];
1018 bf1[2] = bf0[12];
1019 bf1[3] = -bf0[4];
1020 bf1[4] = bf0[6];
1021 bf1[5] = -bf0[14];
1022 bf1[6] = bf0[10];
1023 bf1[7] = -bf0[2];
1024 bf1[8] = bf0[3];
1025 bf1[9] = -bf0[11];
1026 bf1[10] = bf0[15];
1027 bf1[11] = -bf0[7];
1028 bf1[12] = bf0[5];
1029 bf1[13] = -bf0[13];
1030 bf1[14] = bf0[9];
1031 bf1[15] = -bf0[1];
1032 }
1033
av1_iidentity4_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1034 void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1035 const int8_t *stage_range) {
1036 (void)cos_bit;
1037 (void)stage_range;
1038 for (int i = 0; i < 4; ++i) {
1039 output[i] = round_shift((int64_t)NewSqrt2 * input[i], NewSqrt2Bits);
1040 }
1041 assert(stage_range[0] + NewSqrt2Bits <= 32);
1042 }
1043
av1_iidentity8_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1044 void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1045 const int8_t *stage_range) {
1046 (void)cos_bit;
1047 (void)stage_range;
1048 for (int i = 0; i < 8; ++i) output[i] = (int32_t)((int64_t)input[i] * 2);
1049 }
1050
av1_iidentity16_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1051 void av1_iidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1052 const int8_t *stage_range) {
1053 (void)cos_bit;
1054 (void)stage_range;
1055 for (int i = 0; i < 16; ++i)
1056 output[i] = round_shift((int64_t)NewSqrt2 * 2 * input[i], NewSqrt2Bits);
1057 assert(stage_range[0] + NewSqrt2Bits <= 32);
1058 }
1059
av1_iidentity32_c(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1060 void av1_iidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit,
1061 const int8_t *stage_range) {
1062 (void)cos_bit;
1063 (void)stage_range;
1064 for (int i = 0; i < 32; ++i) output[i] = (int32_t)((int64_t)input[i] * 4);
1065 }
1066
av1_idct64_new(const int32_t * input,int32_t * output,int8_t cos_bit,const int8_t * stage_range)1067 void av1_idct64_new(const int32_t *input, int32_t *output, int8_t cos_bit,
1068 const int8_t *stage_range) {
1069 assert(output != input);
1070 const int32_t size = 64;
1071 const int32_t *cospi = cospi_arr(cos_bit);
1072
1073 int32_t stage = 0;
1074 int32_t *bf0, *bf1;
1075 int32_t step[64];
1076
1077 // stage 0;
1078
1079 // stage 1;
1080 stage++;
1081 bf1 = output;
1082 bf1[0] = input[0];
1083 bf1[1] = input[32];
1084 bf1[2] = input[16];
1085 bf1[3] = input[48];
1086 bf1[4] = input[8];
1087 bf1[5] = input[40];
1088 bf1[6] = input[24];
1089 bf1[7] = input[56];
1090 bf1[8] = input[4];
1091 bf1[9] = input[36];
1092 bf1[10] = input[20];
1093 bf1[11] = input[52];
1094 bf1[12] = input[12];
1095 bf1[13] = input[44];
1096 bf1[14] = input[28];
1097 bf1[15] = input[60];
1098 bf1[16] = input[2];
1099 bf1[17] = input[34];
1100 bf1[18] = input[18];
1101 bf1[19] = input[50];
1102 bf1[20] = input[10];
1103 bf1[21] = input[42];
1104 bf1[22] = input[26];
1105 bf1[23] = input[58];
1106 bf1[24] = input[6];
1107 bf1[25] = input[38];
1108 bf1[26] = input[22];
1109 bf1[27] = input[54];
1110 bf1[28] = input[14];
1111 bf1[29] = input[46];
1112 bf1[30] = input[30];
1113 bf1[31] = input[62];
1114 bf1[32] = input[1];
1115 bf1[33] = input[33];
1116 bf1[34] = input[17];
1117 bf1[35] = input[49];
1118 bf1[36] = input[9];
1119 bf1[37] = input[41];
1120 bf1[38] = input[25];
1121 bf1[39] = input[57];
1122 bf1[40] = input[5];
1123 bf1[41] = input[37];
1124 bf1[42] = input[21];
1125 bf1[43] = input[53];
1126 bf1[44] = input[13];
1127 bf1[45] = input[45];
1128 bf1[46] = input[29];
1129 bf1[47] = input[61];
1130 bf1[48] = input[3];
1131 bf1[49] = input[35];
1132 bf1[50] = input[19];
1133 bf1[51] = input[51];
1134 bf1[52] = input[11];
1135 bf1[53] = input[43];
1136 bf1[54] = input[27];
1137 bf1[55] = input[59];
1138 bf1[56] = input[7];
1139 bf1[57] = input[39];
1140 bf1[58] = input[23];
1141 bf1[59] = input[55];
1142 bf1[60] = input[15];
1143 bf1[61] = input[47];
1144 bf1[62] = input[31];
1145 bf1[63] = input[63];
1146 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1147
1148 // stage 2
1149 stage++;
1150 bf0 = output;
1151 bf1 = step;
1152 bf1[0] = bf0[0];
1153 bf1[1] = bf0[1];
1154 bf1[2] = bf0[2];
1155 bf1[3] = bf0[3];
1156 bf1[4] = bf0[4];
1157 bf1[5] = bf0[5];
1158 bf1[6] = bf0[6];
1159 bf1[7] = bf0[7];
1160 bf1[8] = bf0[8];
1161 bf1[9] = bf0[9];
1162 bf1[10] = bf0[10];
1163 bf1[11] = bf0[11];
1164 bf1[12] = bf0[12];
1165 bf1[13] = bf0[13];
1166 bf1[14] = bf0[14];
1167 bf1[15] = bf0[15];
1168 bf1[16] = bf0[16];
1169 bf1[17] = bf0[17];
1170 bf1[18] = bf0[18];
1171 bf1[19] = bf0[19];
1172 bf1[20] = bf0[20];
1173 bf1[21] = bf0[21];
1174 bf1[22] = bf0[22];
1175 bf1[23] = bf0[23];
1176 bf1[24] = bf0[24];
1177 bf1[25] = bf0[25];
1178 bf1[26] = bf0[26];
1179 bf1[27] = bf0[27];
1180 bf1[28] = bf0[28];
1181 bf1[29] = bf0[29];
1182 bf1[30] = bf0[30];
1183 bf1[31] = bf0[31];
1184 bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit);
1185 bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit);
1186 bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit);
1187 bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit);
1188 bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit);
1189 bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit);
1190 bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit);
1191 bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit);
1192 bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit);
1193 bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit);
1194 bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit);
1195 bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit);
1196 bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit);
1197 bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit);
1198 bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit);
1199 bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit);
1200 bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit);
1201 bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit);
1202 bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit);
1203 bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit);
1204 bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit);
1205 bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit);
1206 bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit);
1207 bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit);
1208 bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit);
1209 bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit);
1210 bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit);
1211 bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit);
1212 bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit);
1213 bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit);
1214 bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit);
1215 bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit);
1216 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1217
1218 // stage 3
1219 stage++;
1220 bf0 = step;
1221 bf1 = output;
1222 bf1[0] = bf0[0];
1223 bf1[1] = bf0[1];
1224 bf1[2] = bf0[2];
1225 bf1[3] = bf0[3];
1226 bf1[4] = bf0[4];
1227 bf1[5] = bf0[5];
1228 bf1[6] = bf0[6];
1229 bf1[7] = bf0[7];
1230 bf1[8] = bf0[8];
1231 bf1[9] = bf0[9];
1232 bf1[10] = bf0[10];
1233 bf1[11] = bf0[11];
1234 bf1[12] = bf0[12];
1235 bf1[13] = bf0[13];
1236 bf1[14] = bf0[14];
1237 bf1[15] = bf0[15];
1238 bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit);
1239 bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit);
1240 bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit);
1241 bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit);
1242 bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit);
1243 bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit);
1244 bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit);
1245 bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit);
1246 bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit);
1247 bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit);
1248 bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit);
1249 bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit);
1250 bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit);
1251 bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit);
1252 bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit);
1253 bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit);
1254 bf1[32] = clamp_value(bf0[32] + bf0[33], stage_range[stage]);
1255 bf1[33] = clamp_value(bf0[32] - bf0[33], stage_range[stage]);
1256 bf1[34] = clamp_value(-bf0[34] + bf0[35], stage_range[stage]);
1257 bf1[35] = clamp_value(bf0[34] + bf0[35], stage_range[stage]);
1258 bf1[36] = clamp_value(bf0[36] + bf0[37], stage_range[stage]);
1259 bf1[37] = clamp_value(bf0[36] - bf0[37], stage_range[stage]);
1260 bf1[38] = clamp_value(-bf0[38] + bf0[39], stage_range[stage]);
1261 bf1[39] = clamp_value(bf0[38] + bf0[39], stage_range[stage]);
1262 bf1[40] = clamp_value(bf0[40] + bf0[41], stage_range[stage]);
1263 bf1[41] = clamp_value(bf0[40] - bf0[41], stage_range[stage]);
1264 bf1[42] = clamp_value(-bf0[42] + bf0[43], stage_range[stage]);
1265 bf1[43] = clamp_value(bf0[42] + bf0[43], stage_range[stage]);
1266 bf1[44] = clamp_value(bf0[44] + bf0[45], stage_range[stage]);
1267 bf1[45] = clamp_value(bf0[44] - bf0[45], stage_range[stage]);
1268 bf1[46] = clamp_value(-bf0[46] + bf0[47], stage_range[stage]);
1269 bf1[47] = clamp_value(bf0[46] + bf0[47], stage_range[stage]);
1270 bf1[48] = clamp_value(bf0[48] + bf0[49], stage_range[stage]);
1271 bf1[49] = clamp_value(bf0[48] - bf0[49], stage_range[stage]);
1272 bf1[50] = clamp_value(-bf0[50] + bf0[51], stage_range[stage]);
1273 bf1[51] = clamp_value(bf0[50] + bf0[51], stage_range[stage]);
1274 bf1[52] = clamp_value(bf0[52] + bf0[53], stage_range[stage]);
1275 bf1[53] = clamp_value(bf0[52] - bf0[53], stage_range[stage]);
1276 bf1[54] = clamp_value(-bf0[54] + bf0[55], stage_range[stage]);
1277 bf1[55] = clamp_value(bf0[54] + bf0[55], stage_range[stage]);
1278 bf1[56] = clamp_value(bf0[56] + bf0[57], stage_range[stage]);
1279 bf1[57] = clamp_value(bf0[56] - bf0[57], stage_range[stage]);
1280 bf1[58] = clamp_value(-bf0[58] + bf0[59], stage_range[stage]);
1281 bf1[59] = clamp_value(bf0[58] + bf0[59], stage_range[stage]);
1282 bf1[60] = clamp_value(bf0[60] + bf0[61], stage_range[stage]);
1283 bf1[61] = clamp_value(bf0[60] - bf0[61], stage_range[stage]);
1284 bf1[62] = clamp_value(-bf0[62] + bf0[63], stage_range[stage]);
1285 bf1[63] = clamp_value(bf0[62] + bf0[63], stage_range[stage]);
1286 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1287
1288 // stage 4
1289 stage++;
1290 bf0 = output;
1291 bf1 = step;
1292 bf1[0] = bf0[0];
1293 bf1[1] = bf0[1];
1294 bf1[2] = bf0[2];
1295 bf1[3] = bf0[3];
1296 bf1[4] = bf0[4];
1297 bf1[5] = bf0[5];
1298 bf1[6] = bf0[6];
1299 bf1[7] = bf0[7];
1300 bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit);
1301 bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit);
1302 bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit);
1303 bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit);
1304 bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit);
1305 bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit);
1306 bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit);
1307 bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit);
1308 bf1[16] = clamp_value(bf0[16] + bf0[17], stage_range[stage]);
1309 bf1[17] = clamp_value(bf0[16] - bf0[17], stage_range[stage]);
1310 bf1[18] = clamp_value(-bf0[18] + bf0[19], stage_range[stage]);
1311 bf1[19] = clamp_value(bf0[18] + bf0[19], stage_range[stage]);
1312 bf1[20] = clamp_value(bf0[20] + bf0[21], stage_range[stage]);
1313 bf1[21] = clamp_value(bf0[20] - bf0[21], stage_range[stage]);
1314 bf1[22] = clamp_value(-bf0[22] + bf0[23], stage_range[stage]);
1315 bf1[23] = clamp_value(bf0[22] + bf0[23], stage_range[stage]);
1316 bf1[24] = clamp_value(bf0[24] + bf0[25], stage_range[stage]);
1317 bf1[25] = clamp_value(bf0[24] - bf0[25], stage_range[stage]);
1318 bf1[26] = clamp_value(-bf0[26] + bf0[27], stage_range[stage]);
1319 bf1[27] = clamp_value(bf0[26] + bf0[27], stage_range[stage]);
1320 bf1[28] = clamp_value(bf0[28] + bf0[29], stage_range[stage]);
1321 bf1[29] = clamp_value(bf0[28] - bf0[29], stage_range[stage]);
1322 bf1[30] = clamp_value(-bf0[30] + bf0[31], stage_range[stage]);
1323 bf1[31] = clamp_value(bf0[30] + bf0[31], stage_range[stage]);
1324 bf1[32] = bf0[32];
1325 bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit);
1326 bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit);
1327 bf1[35] = bf0[35];
1328 bf1[36] = bf0[36];
1329 bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit);
1330 bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit);
1331 bf1[39] = bf0[39];
1332 bf1[40] = bf0[40];
1333 bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit);
1334 bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit);
1335 bf1[43] = bf0[43];
1336 bf1[44] = bf0[44];
1337 bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit);
1338 bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit);
1339 bf1[47] = bf0[47];
1340 bf1[48] = bf0[48];
1341 bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit);
1342 bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit);
1343 bf1[51] = bf0[51];
1344 bf1[52] = bf0[52];
1345 bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit);
1346 bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit);
1347 bf1[55] = bf0[55];
1348 bf1[56] = bf0[56];
1349 bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit);
1350 bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit);
1351 bf1[59] = bf0[59];
1352 bf1[60] = bf0[60];
1353 bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit);
1354 bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit);
1355 bf1[63] = bf0[63];
1356 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1357
1358 // stage 5
1359 stage++;
1360 bf0 = step;
1361 bf1 = output;
1362 bf1[0] = bf0[0];
1363 bf1[1] = bf0[1];
1364 bf1[2] = bf0[2];
1365 bf1[3] = bf0[3];
1366 bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit);
1367 bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit);
1368 bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit);
1369 bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit);
1370 bf1[8] = clamp_value(bf0[8] + bf0[9], stage_range[stage]);
1371 bf1[9] = clamp_value(bf0[8] - bf0[9], stage_range[stage]);
1372 bf1[10] = clamp_value(-bf0[10] + bf0[11], stage_range[stage]);
1373 bf1[11] = clamp_value(bf0[10] + bf0[11], stage_range[stage]);
1374 bf1[12] = clamp_value(bf0[12] + bf0[13], stage_range[stage]);
1375 bf1[13] = clamp_value(bf0[12] - bf0[13], stage_range[stage]);
1376 bf1[14] = clamp_value(-bf0[14] + bf0[15], stage_range[stage]);
1377 bf1[15] = clamp_value(bf0[14] + bf0[15], stage_range[stage]);
1378 bf1[16] = bf0[16];
1379 bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit);
1380 bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit);
1381 bf1[19] = bf0[19];
1382 bf1[20] = bf0[20];
1383 bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit);
1384 bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit);
1385 bf1[23] = bf0[23];
1386 bf1[24] = bf0[24];
1387 bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit);
1388 bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit);
1389 bf1[27] = bf0[27];
1390 bf1[28] = bf0[28];
1391 bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit);
1392 bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit);
1393 bf1[31] = bf0[31];
1394 bf1[32] = clamp_value(bf0[32] + bf0[35], stage_range[stage]);
1395 bf1[33] = clamp_value(bf0[33] + bf0[34], stage_range[stage]);
1396 bf1[34] = clamp_value(bf0[33] - bf0[34], stage_range[stage]);
1397 bf1[35] = clamp_value(bf0[32] - bf0[35], stage_range[stage]);
1398 bf1[36] = clamp_value(-bf0[36] + bf0[39], stage_range[stage]);
1399 bf1[37] = clamp_value(-bf0[37] + bf0[38], stage_range[stage]);
1400 bf1[38] = clamp_value(bf0[37] + bf0[38], stage_range[stage]);
1401 bf1[39] = clamp_value(bf0[36] + bf0[39], stage_range[stage]);
1402 bf1[40] = clamp_value(bf0[40] + bf0[43], stage_range[stage]);
1403 bf1[41] = clamp_value(bf0[41] + bf0[42], stage_range[stage]);
1404 bf1[42] = clamp_value(bf0[41] - bf0[42], stage_range[stage]);
1405 bf1[43] = clamp_value(bf0[40] - bf0[43], stage_range[stage]);
1406 bf1[44] = clamp_value(-bf0[44] + bf0[47], stage_range[stage]);
1407 bf1[45] = clamp_value(-bf0[45] + bf0[46], stage_range[stage]);
1408 bf1[46] = clamp_value(bf0[45] + bf0[46], stage_range[stage]);
1409 bf1[47] = clamp_value(bf0[44] + bf0[47], stage_range[stage]);
1410 bf1[48] = clamp_value(bf0[48] + bf0[51], stage_range[stage]);
1411 bf1[49] = clamp_value(bf0[49] + bf0[50], stage_range[stage]);
1412 bf1[50] = clamp_value(bf0[49] - bf0[50], stage_range[stage]);
1413 bf1[51] = clamp_value(bf0[48] - bf0[51], stage_range[stage]);
1414 bf1[52] = clamp_value(-bf0[52] + bf0[55], stage_range[stage]);
1415 bf1[53] = clamp_value(-bf0[53] + bf0[54], stage_range[stage]);
1416 bf1[54] = clamp_value(bf0[53] + bf0[54], stage_range[stage]);
1417 bf1[55] = clamp_value(bf0[52] + bf0[55], stage_range[stage]);
1418 bf1[56] = clamp_value(bf0[56] + bf0[59], stage_range[stage]);
1419 bf1[57] = clamp_value(bf0[57] + bf0[58], stage_range[stage]);
1420 bf1[58] = clamp_value(bf0[57] - bf0[58], stage_range[stage]);
1421 bf1[59] = clamp_value(bf0[56] - bf0[59], stage_range[stage]);
1422 bf1[60] = clamp_value(-bf0[60] + bf0[63], stage_range[stage]);
1423 bf1[61] = clamp_value(-bf0[61] + bf0[62], stage_range[stage]);
1424 bf1[62] = clamp_value(bf0[61] + bf0[62], stage_range[stage]);
1425 bf1[63] = clamp_value(bf0[60] + bf0[63], stage_range[stage]);
1426 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1427
1428 // stage 6
1429 stage++;
1430 bf0 = output;
1431 bf1 = step;
1432 bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit);
1433 bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit);
1434 bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit);
1435 bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit);
1436 bf1[4] = clamp_value(bf0[4] + bf0[5], stage_range[stage]);
1437 bf1[5] = clamp_value(bf0[4] - bf0[5], stage_range[stage]);
1438 bf1[6] = clamp_value(-bf0[6] + bf0[7], stage_range[stage]);
1439 bf1[7] = clamp_value(bf0[6] + bf0[7], stage_range[stage]);
1440 bf1[8] = bf0[8];
1441 bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit);
1442 bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit);
1443 bf1[11] = bf0[11];
1444 bf1[12] = bf0[12];
1445 bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit);
1446 bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit);
1447 bf1[15] = bf0[15];
1448 bf1[16] = clamp_value(bf0[16] + bf0[19], stage_range[stage]);
1449 bf1[17] = clamp_value(bf0[17] + bf0[18], stage_range[stage]);
1450 bf1[18] = clamp_value(bf0[17] - bf0[18], stage_range[stage]);
1451 bf1[19] = clamp_value(bf0[16] - bf0[19], stage_range[stage]);
1452 bf1[20] = clamp_value(-bf0[20] + bf0[23], stage_range[stage]);
1453 bf1[21] = clamp_value(-bf0[21] + bf0[22], stage_range[stage]);
1454 bf1[22] = clamp_value(bf0[21] + bf0[22], stage_range[stage]);
1455 bf1[23] = clamp_value(bf0[20] + bf0[23], stage_range[stage]);
1456 bf1[24] = clamp_value(bf0[24] + bf0[27], stage_range[stage]);
1457 bf1[25] = clamp_value(bf0[25] + bf0[26], stage_range[stage]);
1458 bf1[26] = clamp_value(bf0[25] - bf0[26], stage_range[stage]);
1459 bf1[27] = clamp_value(bf0[24] - bf0[27], stage_range[stage]);
1460 bf1[28] = clamp_value(-bf0[28] + bf0[31], stage_range[stage]);
1461 bf1[29] = clamp_value(-bf0[29] + bf0[30], stage_range[stage]);
1462 bf1[30] = clamp_value(bf0[29] + bf0[30], stage_range[stage]);
1463 bf1[31] = clamp_value(bf0[28] + bf0[31], stage_range[stage]);
1464 bf1[32] = bf0[32];
1465 bf1[33] = bf0[33];
1466 bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit);
1467 bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit);
1468 bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit);
1469 bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit);
1470 bf1[38] = bf0[38];
1471 bf1[39] = bf0[39];
1472 bf1[40] = bf0[40];
1473 bf1[41] = bf0[41];
1474 bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit);
1475 bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit);
1476 bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit);
1477 bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit);
1478 bf1[46] = bf0[46];
1479 bf1[47] = bf0[47];
1480 bf1[48] = bf0[48];
1481 bf1[49] = bf0[49];
1482 bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit);
1483 bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit);
1484 bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit);
1485 bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit);
1486 bf1[54] = bf0[54];
1487 bf1[55] = bf0[55];
1488 bf1[56] = bf0[56];
1489 bf1[57] = bf0[57];
1490 bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit);
1491 bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit);
1492 bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit);
1493 bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit);
1494 bf1[62] = bf0[62];
1495 bf1[63] = bf0[63];
1496 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1497
1498 // stage 7
1499 stage++;
1500 bf0 = step;
1501 bf1 = output;
1502 bf1[0] = clamp_value(bf0[0] + bf0[3], stage_range[stage]);
1503 bf1[1] = clamp_value(bf0[1] + bf0[2], stage_range[stage]);
1504 bf1[2] = clamp_value(bf0[1] - bf0[2], stage_range[stage]);
1505 bf1[3] = clamp_value(bf0[0] - bf0[3], stage_range[stage]);
1506 bf1[4] = bf0[4];
1507 bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1508 bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit);
1509 bf1[7] = bf0[7];
1510 bf1[8] = clamp_value(bf0[8] + bf0[11], stage_range[stage]);
1511 bf1[9] = clamp_value(bf0[9] + bf0[10], stage_range[stage]);
1512 bf1[10] = clamp_value(bf0[9] - bf0[10], stage_range[stage]);
1513 bf1[11] = clamp_value(bf0[8] - bf0[11], stage_range[stage]);
1514 bf1[12] = clamp_value(-bf0[12] + bf0[15], stage_range[stage]);
1515 bf1[13] = clamp_value(-bf0[13] + bf0[14], stage_range[stage]);
1516 bf1[14] = clamp_value(bf0[13] + bf0[14], stage_range[stage]);
1517 bf1[15] = clamp_value(bf0[12] + bf0[15], stage_range[stage]);
1518 bf1[16] = bf0[16];
1519 bf1[17] = bf0[17];
1520 bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit);
1521 bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit);
1522 bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit);
1523 bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit);
1524 bf1[22] = bf0[22];
1525 bf1[23] = bf0[23];
1526 bf1[24] = bf0[24];
1527 bf1[25] = bf0[25];
1528 bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit);
1529 bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit);
1530 bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit);
1531 bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit);
1532 bf1[30] = bf0[30];
1533 bf1[31] = bf0[31];
1534 bf1[32] = clamp_value(bf0[32] + bf0[39], stage_range[stage]);
1535 bf1[33] = clamp_value(bf0[33] + bf0[38], stage_range[stage]);
1536 bf1[34] = clamp_value(bf0[34] + bf0[37], stage_range[stage]);
1537 bf1[35] = clamp_value(bf0[35] + bf0[36], stage_range[stage]);
1538 bf1[36] = clamp_value(bf0[35] - bf0[36], stage_range[stage]);
1539 bf1[37] = clamp_value(bf0[34] - bf0[37], stage_range[stage]);
1540 bf1[38] = clamp_value(bf0[33] - bf0[38], stage_range[stage]);
1541 bf1[39] = clamp_value(bf0[32] - bf0[39], stage_range[stage]);
1542 bf1[40] = clamp_value(-bf0[40] + bf0[47], stage_range[stage]);
1543 bf1[41] = clamp_value(-bf0[41] + bf0[46], stage_range[stage]);
1544 bf1[42] = clamp_value(-bf0[42] + bf0[45], stage_range[stage]);
1545 bf1[43] = clamp_value(-bf0[43] + bf0[44], stage_range[stage]);
1546 bf1[44] = clamp_value(bf0[43] + bf0[44], stage_range[stage]);
1547 bf1[45] = clamp_value(bf0[42] + bf0[45], stage_range[stage]);
1548 bf1[46] = clamp_value(bf0[41] + bf0[46], stage_range[stage]);
1549 bf1[47] = clamp_value(bf0[40] + bf0[47], stage_range[stage]);
1550 bf1[48] = clamp_value(bf0[48] + bf0[55], stage_range[stage]);
1551 bf1[49] = clamp_value(bf0[49] + bf0[54], stage_range[stage]);
1552 bf1[50] = clamp_value(bf0[50] + bf0[53], stage_range[stage]);
1553 bf1[51] = clamp_value(bf0[51] + bf0[52], stage_range[stage]);
1554 bf1[52] = clamp_value(bf0[51] - bf0[52], stage_range[stage]);
1555 bf1[53] = clamp_value(bf0[50] - bf0[53], stage_range[stage]);
1556 bf1[54] = clamp_value(bf0[49] - bf0[54], stage_range[stage]);
1557 bf1[55] = clamp_value(bf0[48] - bf0[55], stage_range[stage]);
1558 bf1[56] = clamp_value(-bf0[56] + bf0[63], stage_range[stage]);
1559 bf1[57] = clamp_value(-bf0[57] + bf0[62], stage_range[stage]);
1560 bf1[58] = clamp_value(-bf0[58] + bf0[61], stage_range[stage]);
1561 bf1[59] = clamp_value(-bf0[59] + bf0[60], stage_range[stage]);
1562 bf1[60] = clamp_value(bf0[59] + bf0[60], stage_range[stage]);
1563 bf1[61] = clamp_value(bf0[58] + bf0[61], stage_range[stage]);
1564 bf1[62] = clamp_value(bf0[57] + bf0[62], stage_range[stage]);
1565 bf1[63] = clamp_value(bf0[56] + bf0[63], stage_range[stage]);
1566 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1567
1568 // stage 8
1569 stage++;
1570 bf0 = output;
1571 bf1 = step;
1572 bf1[0] = clamp_value(bf0[0] + bf0[7], stage_range[stage]);
1573 bf1[1] = clamp_value(bf0[1] + bf0[6], stage_range[stage]);
1574 bf1[2] = clamp_value(bf0[2] + bf0[5], stage_range[stage]);
1575 bf1[3] = clamp_value(bf0[3] + bf0[4], stage_range[stage]);
1576 bf1[4] = clamp_value(bf0[3] - bf0[4], stage_range[stage]);
1577 bf1[5] = clamp_value(bf0[2] - bf0[5], stage_range[stage]);
1578 bf1[6] = clamp_value(bf0[1] - bf0[6], stage_range[stage]);
1579 bf1[7] = clamp_value(bf0[0] - bf0[7], stage_range[stage]);
1580 bf1[8] = bf0[8];
1581 bf1[9] = bf0[9];
1582 bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1583 bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1584 bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit);
1585 bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit);
1586 bf1[14] = bf0[14];
1587 bf1[15] = bf0[15];
1588 bf1[16] = clamp_value(bf0[16] + bf0[23], stage_range[stage]);
1589 bf1[17] = clamp_value(bf0[17] + bf0[22], stage_range[stage]);
1590 bf1[18] = clamp_value(bf0[18] + bf0[21], stage_range[stage]);
1591 bf1[19] = clamp_value(bf0[19] + bf0[20], stage_range[stage]);
1592 bf1[20] = clamp_value(bf0[19] - bf0[20], stage_range[stage]);
1593 bf1[21] = clamp_value(bf0[18] - bf0[21], stage_range[stage]);
1594 bf1[22] = clamp_value(bf0[17] - bf0[22], stage_range[stage]);
1595 bf1[23] = clamp_value(bf0[16] - bf0[23], stage_range[stage]);
1596 bf1[24] = clamp_value(-bf0[24] + bf0[31], stage_range[stage]);
1597 bf1[25] = clamp_value(-bf0[25] + bf0[30], stage_range[stage]);
1598 bf1[26] = clamp_value(-bf0[26] + bf0[29], stage_range[stage]);
1599 bf1[27] = clamp_value(-bf0[27] + bf0[28], stage_range[stage]);
1600 bf1[28] = clamp_value(bf0[27] + bf0[28], stage_range[stage]);
1601 bf1[29] = clamp_value(bf0[26] + bf0[29], stage_range[stage]);
1602 bf1[30] = clamp_value(bf0[25] + bf0[30], stage_range[stage]);
1603 bf1[31] = clamp_value(bf0[24] + bf0[31], stage_range[stage]);
1604 bf1[32] = bf0[32];
1605 bf1[33] = bf0[33];
1606 bf1[34] = bf0[34];
1607 bf1[35] = bf0[35];
1608 bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit);
1609 bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit);
1610 bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit);
1611 bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit);
1612 bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit);
1613 bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit);
1614 bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit);
1615 bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit);
1616 bf1[44] = bf0[44];
1617 bf1[45] = bf0[45];
1618 bf1[46] = bf0[46];
1619 bf1[47] = bf0[47];
1620 bf1[48] = bf0[48];
1621 bf1[49] = bf0[49];
1622 bf1[50] = bf0[50];
1623 bf1[51] = bf0[51];
1624 bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit);
1625 bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit);
1626 bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit);
1627 bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit);
1628 bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit);
1629 bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit);
1630 bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit);
1631 bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit);
1632 bf1[60] = bf0[60];
1633 bf1[61] = bf0[61];
1634 bf1[62] = bf0[62];
1635 bf1[63] = bf0[63];
1636 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1637
1638 // stage 9
1639 stage++;
1640 bf0 = step;
1641 bf1 = output;
1642 bf1[0] = clamp_value(bf0[0] + bf0[15], stage_range[stage]);
1643 bf1[1] = clamp_value(bf0[1] + bf0[14], stage_range[stage]);
1644 bf1[2] = clamp_value(bf0[2] + bf0[13], stage_range[stage]);
1645 bf1[3] = clamp_value(bf0[3] + bf0[12], stage_range[stage]);
1646 bf1[4] = clamp_value(bf0[4] + bf0[11], stage_range[stage]);
1647 bf1[5] = clamp_value(bf0[5] + bf0[10], stage_range[stage]);
1648 bf1[6] = clamp_value(bf0[6] + bf0[9], stage_range[stage]);
1649 bf1[7] = clamp_value(bf0[7] + bf0[8], stage_range[stage]);
1650 bf1[8] = clamp_value(bf0[7] - bf0[8], stage_range[stage]);
1651 bf1[9] = clamp_value(bf0[6] - bf0[9], stage_range[stage]);
1652 bf1[10] = clamp_value(bf0[5] - bf0[10], stage_range[stage]);
1653 bf1[11] = clamp_value(bf0[4] - bf0[11], stage_range[stage]);
1654 bf1[12] = clamp_value(bf0[3] - bf0[12], stage_range[stage]);
1655 bf1[13] = clamp_value(bf0[2] - bf0[13], stage_range[stage]);
1656 bf1[14] = clamp_value(bf0[1] - bf0[14], stage_range[stage]);
1657 bf1[15] = clamp_value(bf0[0] - bf0[15], stage_range[stage]);
1658 bf1[16] = bf0[16];
1659 bf1[17] = bf0[17];
1660 bf1[18] = bf0[18];
1661 bf1[19] = bf0[19];
1662 bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
1663 bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
1664 bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
1665 bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
1666 bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit);
1667 bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit);
1668 bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit);
1669 bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit);
1670 bf1[28] = bf0[28];
1671 bf1[29] = bf0[29];
1672 bf1[30] = bf0[30];
1673 bf1[31] = bf0[31];
1674 bf1[32] = clamp_value(bf0[32] + bf0[47], stage_range[stage]);
1675 bf1[33] = clamp_value(bf0[33] + bf0[46], stage_range[stage]);
1676 bf1[34] = clamp_value(bf0[34] + bf0[45], stage_range[stage]);
1677 bf1[35] = clamp_value(bf0[35] + bf0[44], stage_range[stage]);
1678 bf1[36] = clamp_value(bf0[36] + bf0[43], stage_range[stage]);
1679 bf1[37] = clamp_value(bf0[37] + bf0[42], stage_range[stage]);
1680 bf1[38] = clamp_value(bf0[38] + bf0[41], stage_range[stage]);
1681 bf1[39] = clamp_value(bf0[39] + bf0[40], stage_range[stage]);
1682 bf1[40] = clamp_value(bf0[39] - bf0[40], stage_range[stage]);
1683 bf1[41] = clamp_value(bf0[38] - bf0[41], stage_range[stage]);
1684 bf1[42] = clamp_value(bf0[37] - bf0[42], stage_range[stage]);
1685 bf1[43] = clamp_value(bf0[36] - bf0[43], stage_range[stage]);
1686 bf1[44] = clamp_value(bf0[35] - bf0[44], stage_range[stage]);
1687 bf1[45] = clamp_value(bf0[34] - bf0[45], stage_range[stage]);
1688 bf1[46] = clamp_value(bf0[33] - bf0[46], stage_range[stage]);
1689 bf1[47] = clamp_value(bf0[32] - bf0[47], stage_range[stage]);
1690 bf1[48] = clamp_value(-bf0[48] + bf0[63], stage_range[stage]);
1691 bf1[49] = clamp_value(-bf0[49] + bf0[62], stage_range[stage]);
1692 bf1[50] = clamp_value(-bf0[50] + bf0[61], stage_range[stage]);
1693 bf1[51] = clamp_value(-bf0[51] + bf0[60], stage_range[stage]);
1694 bf1[52] = clamp_value(-bf0[52] + bf0[59], stage_range[stage]);
1695 bf1[53] = clamp_value(-bf0[53] + bf0[58], stage_range[stage]);
1696 bf1[54] = clamp_value(-bf0[54] + bf0[57], stage_range[stage]);
1697 bf1[55] = clamp_value(-bf0[55] + bf0[56], stage_range[stage]);
1698 bf1[56] = clamp_value(bf0[55] + bf0[56], stage_range[stage]);
1699 bf1[57] = clamp_value(bf0[54] + bf0[57], stage_range[stage]);
1700 bf1[58] = clamp_value(bf0[53] + bf0[58], stage_range[stage]);
1701 bf1[59] = clamp_value(bf0[52] + bf0[59], stage_range[stage]);
1702 bf1[60] = clamp_value(bf0[51] + bf0[60], stage_range[stage]);
1703 bf1[61] = clamp_value(bf0[50] + bf0[61], stage_range[stage]);
1704 bf1[62] = clamp_value(bf0[49] + bf0[62], stage_range[stage]);
1705 bf1[63] = clamp_value(bf0[48] + bf0[63], stage_range[stage]);
1706 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1707
1708 // stage 10
1709 stage++;
1710 bf0 = output;
1711 bf1 = step;
1712 bf1[0] = clamp_value(bf0[0] + bf0[31], stage_range[stage]);
1713 bf1[1] = clamp_value(bf0[1] + bf0[30], stage_range[stage]);
1714 bf1[2] = clamp_value(bf0[2] + bf0[29], stage_range[stage]);
1715 bf1[3] = clamp_value(bf0[3] + bf0[28], stage_range[stage]);
1716 bf1[4] = clamp_value(bf0[4] + bf0[27], stage_range[stage]);
1717 bf1[5] = clamp_value(bf0[5] + bf0[26], stage_range[stage]);
1718 bf1[6] = clamp_value(bf0[6] + bf0[25], stage_range[stage]);
1719 bf1[7] = clamp_value(bf0[7] + bf0[24], stage_range[stage]);
1720 bf1[8] = clamp_value(bf0[8] + bf0[23], stage_range[stage]);
1721 bf1[9] = clamp_value(bf0[9] + bf0[22], stage_range[stage]);
1722 bf1[10] = clamp_value(bf0[10] + bf0[21], stage_range[stage]);
1723 bf1[11] = clamp_value(bf0[11] + bf0[20], stage_range[stage]);
1724 bf1[12] = clamp_value(bf0[12] + bf0[19], stage_range[stage]);
1725 bf1[13] = clamp_value(bf0[13] + bf0[18], stage_range[stage]);
1726 bf1[14] = clamp_value(bf0[14] + bf0[17], stage_range[stage]);
1727 bf1[15] = clamp_value(bf0[15] + bf0[16], stage_range[stage]);
1728 bf1[16] = clamp_value(bf0[15] - bf0[16], stage_range[stage]);
1729 bf1[17] = clamp_value(bf0[14] - bf0[17], stage_range[stage]);
1730 bf1[18] = clamp_value(bf0[13] - bf0[18], stage_range[stage]);
1731 bf1[19] = clamp_value(bf0[12] - bf0[19], stage_range[stage]);
1732 bf1[20] = clamp_value(bf0[11] - bf0[20], stage_range[stage]);
1733 bf1[21] = clamp_value(bf0[10] - bf0[21], stage_range[stage]);
1734 bf1[22] = clamp_value(bf0[9] - bf0[22], stage_range[stage]);
1735 bf1[23] = clamp_value(bf0[8] - bf0[23], stage_range[stage]);
1736 bf1[24] = clamp_value(bf0[7] - bf0[24], stage_range[stage]);
1737 bf1[25] = clamp_value(bf0[6] - bf0[25], stage_range[stage]);
1738 bf1[26] = clamp_value(bf0[5] - bf0[26], stage_range[stage]);
1739 bf1[27] = clamp_value(bf0[4] - bf0[27], stage_range[stage]);
1740 bf1[28] = clamp_value(bf0[3] - bf0[28], stage_range[stage]);
1741 bf1[29] = clamp_value(bf0[2] - bf0[29], stage_range[stage]);
1742 bf1[30] = clamp_value(bf0[1] - bf0[30], stage_range[stage]);
1743 bf1[31] = clamp_value(bf0[0] - bf0[31], stage_range[stage]);
1744 bf1[32] = bf0[32];
1745 bf1[33] = bf0[33];
1746 bf1[34] = bf0[34];
1747 bf1[35] = bf0[35];
1748 bf1[36] = bf0[36];
1749 bf1[37] = bf0[37];
1750 bf1[38] = bf0[38];
1751 bf1[39] = bf0[39];
1752 bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
1753 bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
1754 bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
1755 bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
1756 bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
1757 bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
1758 bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
1759 bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
1760 bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit);
1761 bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit);
1762 bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit);
1763 bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit);
1764 bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit);
1765 bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit);
1766 bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit);
1767 bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit);
1768 bf1[56] = bf0[56];
1769 bf1[57] = bf0[57];
1770 bf1[58] = bf0[58];
1771 bf1[59] = bf0[59];
1772 bf1[60] = bf0[60];
1773 bf1[61] = bf0[61];
1774 bf1[62] = bf0[62];
1775 bf1[63] = bf0[63];
1776 av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
1777
1778 // stage 11
1779 stage++;
1780 bf0 = step;
1781 bf1 = output;
1782 bf1[0] = clamp_value(bf0[0] + bf0[63], stage_range[stage]);
1783 bf1[1] = clamp_value(bf0[1] + bf0[62], stage_range[stage]);
1784 bf1[2] = clamp_value(bf0[2] + bf0[61], stage_range[stage]);
1785 bf1[3] = clamp_value(bf0[3] + bf0[60], stage_range[stage]);
1786 bf1[4] = clamp_value(bf0[4] + bf0[59], stage_range[stage]);
1787 bf1[5] = clamp_value(bf0[5] + bf0[58], stage_range[stage]);
1788 bf1[6] = clamp_value(bf0[6] + bf0[57], stage_range[stage]);
1789 bf1[7] = clamp_value(bf0[7] + bf0[56], stage_range[stage]);
1790 bf1[8] = clamp_value(bf0[8] + bf0[55], stage_range[stage]);
1791 bf1[9] = clamp_value(bf0[9] + bf0[54], stage_range[stage]);
1792 bf1[10] = clamp_value(bf0[10] + bf0[53], stage_range[stage]);
1793 bf1[11] = clamp_value(bf0[11] + bf0[52], stage_range[stage]);
1794 bf1[12] = clamp_value(bf0[12] + bf0[51], stage_range[stage]);
1795 bf1[13] = clamp_value(bf0[13] + bf0[50], stage_range[stage]);
1796 bf1[14] = clamp_value(bf0[14] + bf0[49], stage_range[stage]);
1797 bf1[15] = clamp_value(bf0[15] + bf0[48], stage_range[stage]);
1798 bf1[16] = clamp_value(bf0[16] + bf0[47], stage_range[stage]);
1799 bf1[17] = clamp_value(bf0[17] + bf0[46], stage_range[stage]);
1800 bf1[18] = clamp_value(bf0[18] + bf0[45], stage_range[stage]);
1801 bf1[19] = clamp_value(bf0[19] + bf0[44], stage_range[stage]);
1802 bf1[20] = clamp_value(bf0[20] + bf0[43], stage_range[stage]);
1803 bf1[21] = clamp_value(bf0[21] + bf0[42], stage_range[stage]);
1804 bf1[22] = clamp_value(bf0[22] + bf0[41], stage_range[stage]);
1805 bf1[23] = clamp_value(bf0[23] + bf0[40], stage_range[stage]);
1806 bf1[24] = clamp_value(bf0[24] + bf0[39], stage_range[stage]);
1807 bf1[25] = clamp_value(bf0[25] + bf0[38], stage_range[stage]);
1808 bf1[26] = clamp_value(bf0[26] + bf0[37], stage_range[stage]);
1809 bf1[27] = clamp_value(bf0[27] + bf0[36], stage_range[stage]);
1810 bf1[28] = clamp_value(bf0[28] + bf0[35], stage_range[stage]);
1811 bf1[29] = clamp_value(bf0[29] + bf0[34], stage_range[stage]);
1812 bf1[30] = clamp_value(bf0[30] + bf0[33], stage_range[stage]);
1813 bf1[31] = clamp_value(bf0[31] + bf0[32], stage_range[stage]);
1814 bf1[32] = clamp_value(bf0[31] - bf0[32], stage_range[stage]);
1815 bf1[33] = clamp_value(bf0[30] - bf0[33], stage_range[stage]);
1816 bf1[34] = clamp_value(bf0[29] - bf0[34], stage_range[stage]);
1817 bf1[35] = clamp_value(bf0[28] - bf0[35], stage_range[stage]);
1818 bf1[36] = clamp_value(bf0[27] - bf0[36], stage_range[stage]);
1819 bf1[37] = clamp_value(bf0[26] - bf0[37], stage_range[stage]);
1820 bf1[38] = clamp_value(bf0[25] - bf0[38], stage_range[stage]);
1821 bf1[39] = clamp_value(bf0[24] - bf0[39], stage_range[stage]);
1822 bf1[40] = clamp_value(bf0[23] - bf0[40], stage_range[stage]);
1823 bf1[41] = clamp_value(bf0[22] - bf0[41], stage_range[stage]);
1824 bf1[42] = clamp_value(bf0[21] - bf0[42], stage_range[stage]);
1825 bf1[43] = clamp_value(bf0[20] - bf0[43], stage_range[stage]);
1826 bf1[44] = clamp_value(bf0[19] - bf0[44], stage_range[stage]);
1827 bf1[45] = clamp_value(bf0[18] - bf0[45], stage_range[stage]);
1828 bf1[46] = clamp_value(bf0[17] - bf0[46], stage_range[stage]);
1829 bf1[47] = clamp_value(bf0[16] - bf0[47], stage_range[stage]);
1830 bf1[48] = clamp_value(bf0[15] - bf0[48], stage_range[stage]);
1831 bf1[49] = clamp_value(bf0[14] - bf0[49], stage_range[stage]);
1832 bf1[50] = clamp_value(bf0[13] - bf0[50], stage_range[stage]);
1833 bf1[51] = clamp_value(bf0[12] - bf0[51], stage_range[stage]);
1834 bf1[52] = clamp_value(bf0[11] - bf0[52], stage_range[stage]);
1835 bf1[53] = clamp_value(bf0[10] - bf0[53], stage_range[stage]);
1836 bf1[54] = clamp_value(bf0[9] - bf0[54], stage_range[stage]);
1837 bf1[55] = clamp_value(bf0[8] - bf0[55], stage_range[stage]);
1838 bf1[56] = clamp_value(bf0[7] - bf0[56], stage_range[stage]);
1839 bf1[57] = clamp_value(bf0[6] - bf0[57], stage_range[stage]);
1840 bf1[58] = clamp_value(bf0[5] - bf0[58], stage_range[stage]);
1841 bf1[59] = clamp_value(bf0[4] - bf0[59], stage_range[stage]);
1842 bf1[60] = clamp_value(bf0[3] - bf0[60], stage_range[stage]);
1843 bf1[61] = clamp_value(bf0[2] - bf0[61], stage_range[stage]);
1844 bf1[62] = clamp_value(bf0[1] - bf0[62], stage_range[stage]);
1845 bf1[63] = clamp_value(bf0[0] - bf0[63], stage_range[stage]);
1846 }
1847