1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_chroma_itrans_recon_16x16.c
22 *
23 * @brief
24 * Contains function definitions for 16x16 inverse transform and reconstruction
25 * of chroma interleaved data.
26 *
27 * @author
28 * 100470
29 *
30 * @par List of Functions:
31 * - ihevc_chroma_itrans_recon_16x16()
32 *
33 * @remarks
34 * None
35 *
36 *******************************************************************************
37 */
38
39 #include <stdio.h>
40 #include <string.h>
41 #include "ihevc_typedefs.h"
42 #include "ihevc_macros.h"
43 #include "ihevc_platform_macros.h"
44 #include "ihevc_defs.h"
45 #include "ihevc_trans_tables.h"
46 #include "ihevc_chroma_itrans_recon.h"
47 #include "ihevc_func_selector.h"
48 #include "ihevc_trans_macros.h"
49
50 /* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */
51 /* Data visualization */
52 /* U V U V U V U V */
53 /* U V U V U V U V */
54 /* U V U V U V U V */
55 /* U V U V U V U V */
56 /* If the pointer points to first byte of above stream (U) , functions will operate on U component */
57 /* If the pointer points to second byte of above stream (V) , functions will operate on V component */
58
59
60 /**
61 *******************************************************************************
62 *
63 * @brief
64 * This function performs Inverse transform and reconstruction for 16x16
65 * input block
66 *
67 * @par Description:
68 * Performs inverse transform and adds the prediction data and clips output
69 * to 8 bit
70 *
71 * @param[in] pi2_src
72 * Input 16x16 coefficients
73 *
74 * @param[in] pi2_tmp
75 * Temporary 16x16 buffer for storing inverse transform
76 * 1st stage output
77 *
78 * @param[in] pu1_pred
79 * Prediction 16x16 block
80 *
81 * @param[out] pu1_dst
82 * Output 16x16 block
83 *
84 * @param[in] src_strd
85 * Input stride
86 *
87 * @param[in] pred_strd
88 * Prediction stride
89 *
90 * @param[in] dst_strd
91 * Output Stride
92 *
93 * @param[in] shift
94 * Output shift
95 *
96 * @param[in] zero_cols
97 * Zero columns in pi2_src
98 *
99 * @returns Void
100 *
101 * @remarks
102 * None
103 *
104 *******************************************************************************
105 */
106
107
ihevc_chroma_itrans_recon_16x16(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)108 void ihevc_chroma_itrans_recon_16x16(WORD16 *pi2_src,
109 WORD16 *pi2_tmp,
110 UWORD8 *pu1_pred,
111 UWORD8 *pu1_dst,
112 WORD32 src_strd,
113 WORD32 pred_strd,
114 WORD32 dst_strd,
115 WORD32 zero_cols,
116 WORD32 zero_rows)
117 {
118 WORD32 j, k;
119 WORD32 e[8], o[8];
120 WORD32 ee[4], eo[4];
121 WORD32 eee[2], eeo[2];
122 WORD32 add;
123 WORD32 shift;
124 WORD16 *pi2_tmp_orig;
125 WORD32 trans_size;
126 WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols;
127
128 trans_size = TRANS_SIZE_16;
129 pi2_tmp_orig = pi2_tmp;
130
131 if((zero_cols & 0xFFF0) == 0xFFF0)
132 row_limit_2nd_stage = 4;
133 else if((zero_cols & 0xFF00) == 0xFF00)
134 row_limit_2nd_stage = 8;
135 else
136 row_limit_2nd_stage = TRANS_SIZE_16;
137
138 if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */
139 {
140 /************************************************************************************************/
141 /**********************************START - IT_RECON_16x16****************************************/
142 /************************************************************************************************/
143
144 /* Inverse Transform 1st stage */
145 shift = IT_SHIFT_STAGE_1;
146 add = 1 << (shift - 1);
147
148 for(j = 0; j < row_limit_2nd_stage; j++)
149 {
150 /* Checking for Zero Cols */
151 if((zero_cols & 1) == 1)
152 {
153 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
154 }
155 else
156 {
157 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
158 for(k = 0; k < 8; k++)
159 {
160 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
161 + g_ai2_ihevc_trans_16[3][k]
162 * pi2_src[3 * src_strd];
163 }
164 for(k = 0; k < 4; k++)
165 {
166 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd];
167 }
168 eeo[0] = 0;
169 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
170 eeo[1] = 0;
171 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
172
173 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
174 for(k = 0; k < 2; k++)
175 {
176 ee[k] = eee[k] + eeo[k];
177 ee[k + 2] = eee[1 - k] - eeo[1 - k];
178 }
179 for(k = 0; k < 4; k++)
180 {
181 e[k] = ee[k] + eo[k];
182 e[k + 4] = ee[3 - k] - eo[3 - k];
183 }
184 for(k = 0; k < 8; k++)
185 {
186 pi2_tmp[k] =
187 CLIP_S16(((e[k] + o[k] + add) >> shift));
188 pi2_tmp[k + 8] =
189 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
190 }
191 }
192 pi2_src++;
193 pi2_tmp += trans_size;
194 zero_cols = zero_cols >> 1;
195 }
196
197 pi2_tmp = pi2_tmp_orig;
198
199 /* Inverse Transform 2nd stage */
200 shift = IT_SHIFT_STAGE_2;
201 add = 1 << (shift - 1);
202 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
203 {
204 for(j = 0; j < trans_size; j++)
205 {
206 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
207 for(k = 0; k < 8; k++)
208 {
209 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
210 + g_ai2_ihevc_trans_16[3][k]
211 * pi2_tmp[3 * trans_size];
212 }
213 for(k = 0; k < 4; k++)
214 {
215 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
216 }
217 eeo[0] = 0;
218 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
219 eeo[1] = 0;
220 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
221
222 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
223 for(k = 0; k < 2; k++)
224 {
225 ee[k] = eee[k] + eeo[k];
226 ee[k + 2] = eee[1 - k] - eeo[1 - k];
227 }
228 for(k = 0; k < 4; k++)
229 {
230 e[k] = ee[k] + eo[k];
231 e[k + 4] = ee[3 - k] - eo[3 - k];
232 }
233 for(k = 0; k < 8; k++)
234 {
235 WORD32 itrans_out;
236 itrans_out =
237 CLIP_S16(((e[k] + o[k] + add) >> shift));
238 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
239 itrans_out =
240 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
241 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
242 }
243 pi2_tmp++;
244 pu1_pred += pred_strd;
245 pu1_dst += dst_strd;
246 }
247 }
248 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */
249 {
250 for(j = 0; j < trans_size; j++)
251 {
252 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
253 for(k = 0; k < 8; k++)
254 {
255 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
256 + g_ai2_ihevc_trans_16[3][k]
257 * pi2_tmp[3 * trans_size]
258 + g_ai2_ihevc_trans_16[5][k]
259 * pi2_tmp[5 * trans_size]
260 + g_ai2_ihevc_trans_16[7][k]
261 * pi2_tmp[7 * trans_size];
262 }
263 for(k = 0; k < 4; k++)
264 {
265 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
266 + g_ai2_ihevc_trans_16[6][k]
267 * pi2_tmp[6 * trans_size];
268 }
269 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
270 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
271 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
272 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
273
274 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
275 for(k = 0; k < 2; k++)
276 {
277 ee[k] = eee[k] + eeo[k];
278 ee[k + 2] = eee[1 - k] - eeo[1 - k];
279 }
280 for(k = 0; k < 4; k++)
281 {
282 e[k] = ee[k] + eo[k];
283 e[k + 4] = ee[3 - k] - eo[3 - k];
284 }
285 for(k = 0; k < 8; k++)
286 {
287 WORD32 itrans_out;
288 itrans_out =
289 CLIP_S16(((e[k] + o[k] + add) >> shift));
290 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
291 itrans_out =
292 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
293 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
294 }
295 pi2_tmp++;
296 pu1_pred += pred_strd;
297 pu1_dst += dst_strd;
298 }
299 }
300 else /* All rows of output of 1st stage are non-zero */
301 {
302 for(j = 0; j < trans_size; j++)
303 {
304 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
305 for(k = 0; k < 8; k++)
306 {
307 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
308 + g_ai2_ihevc_trans_16[3][k]
309 * pi2_tmp[3 * trans_size]
310 + g_ai2_ihevc_trans_16[5][k]
311 * pi2_tmp[5 * trans_size]
312 + g_ai2_ihevc_trans_16[7][k]
313 * pi2_tmp[7 * trans_size]
314 + g_ai2_ihevc_trans_16[9][k]
315 * pi2_tmp[9 * trans_size]
316 + g_ai2_ihevc_trans_16[11][k]
317 * pi2_tmp[11 * trans_size]
318 + g_ai2_ihevc_trans_16[13][k]
319 * pi2_tmp[13 * trans_size]
320 + g_ai2_ihevc_trans_16[15][k]
321 * pi2_tmp[15 * trans_size];
322 }
323 for(k = 0; k < 4; k++)
324 {
325 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
326 + g_ai2_ihevc_trans_16[6][k]
327 * pi2_tmp[6 * trans_size]
328 + g_ai2_ihevc_trans_16[10][k]
329 * pi2_tmp[10 * trans_size]
330 + g_ai2_ihevc_trans_16[14][k]
331 * pi2_tmp[14 * trans_size];
332 }
333 eeo[0] =
334 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
335 + g_ai2_ihevc_trans_16[12][0]
336 * pi2_tmp[12
337 * trans_size];
338 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
339 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
340 eeo[1] =
341 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
342 + g_ai2_ihevc_trans_16[12][1]
343 * pi2_tmp[12
344 * trans_size];
345 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
346 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
347
348 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
349 for(k = 0; k < 2; k++)
350 {
351 ee[k] = eee[k] + eeo[k];
352 ee[k + 2] = eee[1 - k] - eeo[1 - k];
353 }
354 for(k = 0; k < 4; k++)
355 {
356 e[k] = ee[k] + eo[k];
357 e[k + 4] = ee[3 - k] - eo[3 - k];
358 }
359 for(k = 0; k < 8; k++)
360 {
361 WORD32 itrans_out;
362 itrans_out =
363 CLIP_S16(((e[k] + o[k] + add) >> shift));
364 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
365 itrans_out =
366 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
367 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
368 }
369 pi2_tmp++;
370 pu1_pred += pred_strd;
371 pu1_dst += dst_strd;
372 }
373 }
374 /************************************************************************************************/
375 /************************************END - IT_RECON_16x16****************************************/
376 /************************************************************************************************/
377 }
378 else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */
379 {
380 /************************************************************************************************/
381 /**********************************START - IT_RECON_16x16****************************************/
382 /************************************************************************************************/
383
384 /* Inverse Transform 1st stage */
385 shift = IT_SHIFT_STAGE_1;
386 add = 1 << (shift - 1);
387
388 for(j = 0; j < row_limit_2nd_stage; j++)
389 {
390 /* Checking for Zero Cols */
391 if((zero_cols & 1) == 1)
392 {
393 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
394 }
395 else
396 {
397 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
398 for(k = 0; k < 8; k++)
399 {
400 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
401 + g_ai2_ihevc_trans_16[3][k]
402 * pi2_src[3 * src_strd]
403 + g_ai2_ihevc_trans_16[5][k]
404 * pi2_src[5 * src_strd]
405 + g_ai2_ihevc_trans_16[7][k]
406 * pi2_src[7 * src_strd];
407 }
408 for(k = 0; k < 4; k++)
409 {
410 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
411 + g_ai2_ihevc_trans_16[6][k]
412 * pi2_src[6 * src_strd];
413 }
414 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd];
415 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
416 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd];
417 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
418
419 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
420 for(k = 0; k < 2; k++)
421 {
422 ee[k] = eee[k] + eeo[k];
423 ee[k + 2] = eee[1 - k] - eeo[1 - k];
424 }
425 for(k = 0; k < 4; k++)
426 {
427 e[k] = ee[k] + eo[k];
428 e[k + 4] = ee[3 - k] - eo[3 - k];
429 }
430 for(k = 0; k < 8; k++)
431 {
432 pi2_tmp[k] =
433 CLIP_S16(((e[k] + o[k] + add) >> shift));
434 pi2_tmp[k + 8] =
435 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
436 }
437 }
438 pi2_src++;
439 pi2_tmp += trans_size;
440 zero_cols = zero_cols >> 1;
441 }
442
443 pi2_tmp = pi2_tmp_orig;
444
445 /* Inverse Transform 2nd stage */
446 shift = IT_SHIFT_STAGE_2;
447 add = 1 << (shift - 1);
448 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
449 {
450 for(j = 0; j < trans_size; j++)
451 {
452 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
453 for(k = 0; k < 8; k++)
454 {
455 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
456 + g_ai2_ihevc_trans_16[3][k]
457 * pi2_tmp[3 * trans_size];
458 }
459 for(k = 0; k < 4; k++)
460 {
461 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
462 }
463 eeo[0] = 0;
464 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
465 eeo[1] = 0;
466 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
467
468 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
469 for(k = 0; k < 2; k++)
470 {
471 ee[k] = eee[k] + eeo[k];
472 ee[k + 2] = eee[1 - k] - eeo[1 - k];
473 }
474 for(k = 0; k < 4; k++)
475 {
476 e[k] = ee[k] + eo[k];
477 e[k + 4] = ee[3 - k] - eo[3 - k];
478 }
479 for(k = 0; k < 8; k++)
480 {
481 WORD32 itrans_out;
482 itrans_out =
483 CLIP_S16(((e[k] + o[k] + add) >> shift));
484 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
485 itrans_out =
486 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
487 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
488 }
489 pi2_tmp++;
490 pu1_pred += pred_strd;
491 pu1_dst += dst_strd;
492 }
493 }
494 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */
495 {
496 for(j = 0; j < trans_size; j++)
497 {
498 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
499 for(k = 0; k < 8; k++)
500 {
501 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
502 + g_ai2_ihevc_trans_16[3][k]
503 * pi2_tmp[3 * trans_size]
504 + g_ai2_ihevc_trans_16[5][k]
505 * pi2_tmp[5 * trans_size]
506 + g_ai2_ihevc_trans_16[7][k]
507 * pi2_tmp[7 * trans_size];
508 }
509 for(k = 0; k < 4; k++)
510 {
511 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
512 + g_ai2_ihevc_trans_16[6][k]
513 * pi2_tmp[6 * trans_size];
514 }
515 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
516 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
517 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
518 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
519
520 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
521 for(k = 0; k < 2; k++)
522 {
523 ee[k] = eee[k] + eeo[k];
524 ee[k + 2] = eee[1 - k] - eeo[1 - k];
525 }
526 for(k = 0; k < 4; k++)
527 {
528 e[k] = ee[k] + eo[k];
529 e[k + 4] = ee[3 - k] - eo[3 - k];
530 }
531 for(k = 0; k < 8; k++)
532 {
533 WORD32 itrans_out;
534 itrans_out =
535 CLIP_S16(((e[k] + o[k] + add) >> shift));
536 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
537 itrans_out =
538 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
539 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
540 }
541 pi2_tmp++;
542 pu1_pred += pred_strd;
543 pu1_dst += dst_strd;
544 }
545 }
546 else /* All rows of output of 1st stage are non-zero */
547 {
548 for(j = 0; j < trans_size; j++)
549 {
550 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
551 for(k = 0; k < 8; k++)
552 {
553 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
554 + g_ai2_ihevc_trans_16[3][k]
555 * pi2_tmp[3 * trans_size]
556 + g_ai2_ihevc_trans_16[5][k]
557 * pi2_tmp[5 * trans_size]
558 + g_ai2_ihevc_trans_16[7][k]
559 * pi2_tmp[7 * trans_size]
560 + g_ai2_ihevc_trans_16[9][k]
561 * pi2_tmp[9 * trans_size]
562 + g_ai2_ihevc_trans_16[11][k]
563 * pi2_tmp[11 * trans_size]
564 + g_ai2_ihevc_trans_16[13][k]
565 * pi2_tmp[13 * trans_size]
566 + g_ai2_ihevc_trans_16[15][k]
567 * pi2_tmp[15 * trans_size];
568 }
569 for(k = 0; k < 4; k++)
570 {
571 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
572 + g_ai2_ihevc_trans_16[6][k]
573 * pi2_tmp[6 * trans_size]
574 + g_ai2_ihevc_trans_16[10][k]
575 * pi2_tmp[10 * trans_size]
576 + g_ai2_ihevc_trans_16[14][k]
577 * pi2_tmp[14 * trans_size];
578 }
579 eeo[0] =
580 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
581 + g_ai2_ihevc_trans_16[12][0]
582 * pi2_tmp[12
583 * trans_size];
584 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
585 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
586 eeo[1] =
587 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
588 + g_ai2_ihevc_trans_16[12][1]
589 * pi2_tmp[12
590 * trans_size];
591 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
592 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
593
594 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
595 for(k = 0; k < 2; k++)
596 {
597 ee[k] = eee[k] + eeo[k];
598 ee[k + 2] = eee[1 - k] - eeo[1 - k];
599 }
600 for(k = 0; k < 4; k++)
601 {
602 e[k] = ee[k] + eo[k];
603 e[k + 4] = ee[3 - k] - eo[3 - k];
604 }
605 for(k = 0; k < 8; k++)
606 {
607 WORD32 itrans_out;
608 itrans_out =
609 CLIP_S16(((e[k] + o[k] + add) >> shift));
610 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
611 itrans_out =
612 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
613 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
614 }
615 pi2_tmp++;
616 pu1_pred += pred_strd;
617 pu1_dst += dst_strd;
618 }
619 }
620 /************************************************************************************************/
621 /************************************END - IT_RECON_16x16****************************************/
622 /************************************************************************************************/
623 }
624 else /* All rows of input are non-zero */
625 {
626 /************************************************************************************************/
627 /**********************************START - IT_RECON_16x16****************************************/
628 /************************************************************************************************/
629
630 /* Inverse Transform 1st stage */
631 shift = IT_SHIFT_STAGE_1;
632 add = 1 << (shift - 1);
633
634 for(j = 0; j < row_limit_2nd_stage; j++)
635 {
636 /* Checking for Zero Cols */
637 if((zero_cols & 1) == 1)
638 {
639 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
640 }
641 else
642 {
643 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
644 for(k = 0; k < 8; k++)
645 {
646 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
647 + g_ai2_ihevc_trans_16[3][k]
648 * pi2_src[3 * src_strd]
649 + g_ai2_ihevc_trans_16[5][k]
650 * pi2_src[5 * src_strd]
651 + g_ai2_ihevc_trans_16[7][k]
652 * pi2_src[7 * src_strd]
653 + g_ai2_ihevc_trans_16[9][k]
654 * pi2_src[9 * src_strd]
655 + g_ai2_ihevc_trans_16[11][k]
656 * pi2_src[11 * src_strd]
657 + g_ai2_ihevc_trans_16[13][k]
658 * pi2_src[13 * src_strd]
659 + g_ai2_ihevc_trans_16[15][k]
660 * pi2_src[15 * src_strd];
661 }
662 for(k = 0; k < 4; k++)
663 {
664 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
665 + g_ai2_ihevc_trans_16[6][k]
666 * pi2_src[6 * src_strd]
667 + g_ai2_ihevc_trans_16[10][k]
668 * pi2_src[10 * src_strd]
669 + g_ai2_ihevc_trans_16[14][k]
670 * pi2_src[14 * src_strd];
671 }
672 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]
673 + g_ai2_ihevc_trans_16[12][0]
674 * pi2_src[12 * src_strd];
675 eee[0] =
676 g_ai2_ihevc_trans_16[0][0] * pi2_src[0]
677 + g_ai2_ihevc_trans_16[8][0]
678 * pi2_src[8
679 * src_strd];
680 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]
681 + g_ai2_ihevc_trans_16[12][1]
682 * pi2_src[12 * src_strd];
683 eee[1] =
684 g_ai2_ihevc_trans_16[0][1] * pi2_src[0]
685 + g_ai2_ihevc_trans_16[8][1]
686 * pi2_src[8
687 * src_strd];
688
689 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
690 for(k = 0; k < 2; k++)
691 {
692 ee[k] = eee[k] + eeo[k];
693 ee[k + 2] = eee[1 - k] - eeo[1 - k];
694 }
695 for(k = 0; k < 4; k++)
696 {
697 e[k] = ee[k] + eo[k];
698 e[k + 4] = ee[3 - k] - eo[3 - k];
699 }
700 for(k = 0; k < 8; k++)
701 {
702 pi2_tmp[k] =
703 CLIP_S16(((e[k] + o[k] + add) >> shift));
704 pi2_tmp[k + 8] =
705 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
706 }
707 }
708 pi2_src++;
709 pi2_tmp += trans_size;
710 zero_cols = zero_cols >> 1;
711 }
712
713 pi2_tmp = pi2_tmp_orig;
714
715 /* Inverse Transform 2nd stage */
716 shift = IT_SHIFT_STAGE_2;
717 add = 1 << (shift - 1);
718 if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
719 {
720 for(j = 0; j < trans_size; j++)
721 {
722 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
723 for(k = 0; k < 8; k++)
724 {
725 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
726 + g_ai2_ihevc_trans_16[3][k]
727 * pi2_tmp[3 * trans_size];
728 }
729 for(k = 0; k < 4; k++)
730 {
731 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
732 }
733 eeo[0] = 0;
734 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
735 eeo[1] = 0;
736 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
737
738 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
739 for(k = 0; k < 2; k++)
740 {
741 ee[k] = eee[k] + eeo[k];
742 ee[k + 2] = eee[1 - k] - eeo[1 - k];
743 }
744 for(k = 0; k < 4; k++)
745 {
746 e[k] = ee[k] + eo[k];
747 e[k + 4] = ee[3 - k] - eo[3 - k];
748 }
749 for(k = 0; k < 8; k++)
750 {
751 WORD32 itrans_out;
752 itrans_out =
753 CLIP_S16(((e[k] + o[k] + add) >> shift));
754 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
755 itrans_out =
756 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
757 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
758 }
759 pi2_tmp++;
760 pu1_pred += pred_strd;
761 pu1_dst += dst_strd;
762 }
763 }
764 else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */
765 {
766 for(j = 0; j < trans_size; j++)
767 {
768 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
769 for(k = 0; k < 8; k++)
770 {
771 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
772 + g_ai2_ihevc_trans_16[3][k]
773 * pi2_tmp[3 * trans_size]
774 + g_ai2_ihevc_trans_16[5][k]
775 * pi2_tmp[5 * trans_size]
776 + g_ai2_ihevc_trans_16[7][k]
777 * pi2_tmp[7 * trans_size];
778 }
779 for(k = 0; k < 4; k++)
780 {
781 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
782 + g_ai2_ihevc_trans_16[6][k]
783 * pi2_tmp[6 * trans_size];
784 }
785 eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
786 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
787 eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
788 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
789
790 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
791 for(k = 0; k < 2; k++)
792 {
793 ee[k] = eee[k] + eeo[k];
794 ee[k + 2] = eee[1 - k] - eeo[1 - k];
795 }
796 for(k = 0; k < 4; k++)
797 {
798 e[k] = ee[k] + eo[k];
799 e[k + 4] = ee[3 - k] - eo[3 - k];
800 }
801 for(k = 0; k < 8; k++)
802 {
803 WORD32 itrans_out;
804 itrans_out =
805 CLIP_S16(((e[k] + o[k] + add) >> shift));
806 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
807 itrans_out =
808 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
809 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
810 }
811 pi2_tmp++;
812 pu1_pred += pred_strd;
813 pu1_dst += dst_strd;
814 }
815 }
816 else /* All rows of output of 1st stage are non-zero */
817 {
818 for(j = 0; j < trans_size; j++)
819 {
820 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
821 for(k = 0; k < 8; k++)
822 {
823 o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
824 + g_ai2_ihevc_trans_16[3][k]
825 * pi2_tmp[3 * trans_size]
826 + g_ai2_ihevc_trans_16[5][k]
827 * pi2_tmp[5 * trans_size]
828 + g_ai2_ihevc_trans_16[7][k]
829 * pi2_tmp[7 * trans_size]
830 + g_ai2_ihevc_trans_16[9][k]
831 * pi2_tmp[9 * trans_size]
832 + g_ai2_ihevc_trans_16[11][k]
833 * pi2_tmp[11 * trans_size]
834 + g_ai2_ihevc_trans_16[13][k]
835 * pi2_tmp[13 * trans_size]
836 + g_ai2_ihevc_trans_16[15][k]
837 * pi2_tmp[15 * trans_size];
838 }
839 for(k = 0; k < 4; k++)
840 {
841 eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
842 + g_ai2_ihevc_trans_16[6][k]
843 * pi2_tmp[6 * trans_size]
844 + g_ai2_ihevc_trans_16[10][k]
845 * pi2_tmp[10 * trans_size]
846 + g_ai2_ihevc_trans_16[14][k]
847 * pi2_tmp[14 * trans_size];
848 }
849 eeo[0] =
850 g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
851 + g_ai2_ihevc_trans_16[12][0]
852 * pi2_tmp[12
853 * trans_size];
854 eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
855 + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
856 eeo[1] =
857 g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
858 + g_ai2_ihevc_trans_16[12][1]
859 * pi2_tmp[12
860 * trans_size];
861 eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
862 + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
863
864 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
865 for(k = 0; k < 2; k++)
866 {
867 ee[k] = eee[k] + eeo[k];
868 ee[k + 2] = eee[1 - k] - eeo[1 - k];
869 }
870 for(k = 0; k < 4; k++)
871 {
872 e[k] = ee[k] + eo[k];
873 e[k + 4] = ee[3 - k] - eo[3 - k];
874 }
875 for(k = 0; k < 8; k++)
876 {
877 WORD32 itrans_out;
878 itrans_out =
879 CLIP_S16(((e[k] + o[k] + add) >> shift));
880 pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
881 itrans_out =
882 CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
883 pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2]));
884 }
885 pi2_tmp++;
886 pu1_pred += pred_strd;
887 pu1_dst += dst_strd;
888 }
889 }
890 /************************************************************************************************/
891 /************************************END - IT_RECON_16x16****************************************/
892 /************************************************************************************************/
893 }
894 }
895
896