• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_itrans_recon_32x32.c
22  *
23  * @brief
24  *  Contains function definitions for inverse transform  and reconstruction 32x32
25  *
26  *
27  * @author
28  *  100470
29  *
30  * @par List of Functions:
31  *  - ihevc_itrans_recon_32x32()
32  *
33  * @remarks
34  *  None
35  *
36  *******************************************************************************
37  */
38 #include <stdio.h>
39 #include <string.h>
40 #include "ihevc_typedefs.h"
41 #include "ihevc_macros.h"
42 #include "ihevc_platform_macros.h"
43 #include "ihevc_defs.h"
44 #include "ihevc_trans_tables.h"
45 #include "ihevc_itrans_recon.h"
46 #include "ihevc_func_selector.h"
47 #include "ihevc_trans_macros.h"
48 
49 
50 /**
51  *******************************************************************************
52  *
53  * @brief
54  *  This function performs Inverse transform  and reconstruction for 32x32
55  * input block
56  *
57  * @par Description:
58  *  Performs inverse transform and adds the prediction  data and clips output
59  * to 8 bit
60  *
61  * @param[in] pi2_src
62  *  Input 32x32 coefficients
63  *
64  * @param[in] pi2_tmp
65  *  Temporary 32x32 buffer for storing inverse
66  *
67  *  transform
68  *  1st stage output
69  *
70  * @param[in] pu1_pred
71  *  Prediction 32x32 block
72  *
73  * @param[out] pu1_dst
74  *  Output 32x32 block
75  *
76  * @param[in] src_strd
77  *  Input stride
78  *
79  * @param[in] pred_strd
80  *  Prediction stride
81  *
82  * @param[in] dst_strd
83  *  Output Stride
84  *
85  * @param[in] shift
86  *  Output shift
87  *
88  * @param[in] zero_cols
89  *  Zero columns in pi2_src
90  *
91  * @returns  Void
92  *
93  * @remarks
94  *  None
95  *
96  *******************************************************************************
97  */
98 
ihevc_itrans_recon_32x32(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)99 void ihevc_itrans_recon_32x32(WORD16 *pi2_src,
100                               WORD16 *pi2_tmp,
101                               UWORD8 *pu1_pred,
102                               UWORD8 *pu1_dst,
103                               WORD32 src_strd,
104                               WORD32 pred_strd,
105                               WORD32 dst_strd,
106                               WORD32 zero_cols,
107                               WORD32 zero_rows)
108 {
109     WORD32 j, k;
110     WORD32 e[16], o[16];
111     WORD32 ee[8], eo[8];
112     WORD32 eee[4], eeo[4];
113     WORD32 eeee[2], eeeo[2];
114     WORD32 add;
115     WORD32 shift;
116     WORD16 *pi2_tmp_orig;
117     WORD32 trans_size;
118     WORD32 zero_rows_2nd_stage = zero_cols;
119     WORD32 row_limit_2nd_stage;
120 
121     trans_size = TRANS_SIZE_32;
122     pi2_tmp_orig = pi2_tmp;
123 
124     if((zero_cols & 0xFFFFFFF0) == 0xFFFFFFF0)
125         row_limit_2nd_stage = 4;
126     else if((zero_cols & 0xFFFFFF00) == 0xFFFFFF00)
127         row_limit_2nd_stage = 8;
128     else
129         row_limit_2nd_stage = TRANS_SIZE_32;
130 
131     if((zero_rows & 0xFFFFFFF0) == 0xFFFFFFF0)  /* First 4 rows of input are non-zero */
132     {
133         /************************************************************************************************/
134         /**********************************START - IT_RECON_32x32****************************************/
135         /************************************************************************************************/
136         /* Inverse Transform 1st stage */
137         shift = IT_SHIFT_STAGE_1;
138         add = 1 << (shift - 1);
139 
140         for(j = 0; j < row_limit_2nd_stage; j++)
141         {
142             /* Checking for Zero Cols */
143             if((zero_cols & 1) == 1)
144             {
145                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
146             }
147             else
148             {
149                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
150                 for(k = 0; k < 16; k++)
151                 {
152                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
153                                     + g_ai2_ihevc_trans_32[3][k]
154                                                     * pi2_src[3 * src_strd];
155                 }
156                 for(k = 0; k < 8; k++)
157                 {
158                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd];
159                 }
160 //                for(k = 0; k < 4; k++)
161                 {
162                     eeo[0] = 0;
163                     eeo[1] = 0;
164                     eeo[2] = 0;
165                     eeo[3] = 0;
166                 }
167                 eeeo[0] = 0;
168                 eeeo[1] = 0;
169                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
170                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
171 
172                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
173                 eee[0] = eeee[0] + eeeo[0];
174                 eee[3] = eeee[0] - eeeo[0];
175                 eee[1] = eeee[1] + eeeo[1];
176                 eee[2] = eeee[1] - eeeo[1];
177                 for(k = 0; k < 4; k++)
178                 {
179                     ee[k] = eee[k] + eeo[k];
180                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
181                 }
182                 for(k = 0; k < 8; k++)
183                 {
184                     e[k] = ee[k] + eo[k];
185                     e[k + 8] = ee[7 - k] - eo[7 - k];
186                 }
187                 for(k = 0; k < 16; k++)
188                 {
189                     pi2_tmp[k] =
190                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
191                     pi2_tmp[k + 16] =
192                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
193                 }
194             }
195             pi2_src++;
196             pi2_tmp += trans_size;
197             zero_cols = zero_cols >> 1;
198         }
199 
200         pi2_tmp = pi2_tmp_orig;
201 
202         /* Inverse Transform 2nd stage */
203         shift = IT_SHIFT_STAGE_2;
204         add = 1 << (shift - 1);
205         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
206         {
207             for(j = 0; j < trans_size; j++)
208             {
209                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
210                 for(k = 0; k < 16; k++)
211                 {
212                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
213                                     + g_ai2_ihevc_trans_32[3][k]
214                                                     * pi2_tmp[3 * trans_size];
215                 }
216                 for(k = 0; k < 8; k++)
217                 {
218                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
219                 }
220 //                for(k = 0; k < 4; k++)
221                 {
222                     eeo[0] = 0;
223                     eeo[1] = 0;
224                     eeo[2] = 0;
225                     eeo[3] = 0;
226                 }
227                 eeeo[0] = 0;
228                 eeeo[1] = 0;
229                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
230                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
231 
232                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
233                 eee[0] = eeee[0] + eeeo[0];
234                 eee[3] = eeee[0] - eeeo[0];
235                 eee[1] = eeee[1] + eeeo[1];
236                 eee[2] = eeee[1] - eeeo[1];
237                 for(k = 0; k < 4; k++)
238                 {
239                     ee[k] = eee[k] + eeo[k];
240                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
241                 }
242                 for(k = 0; k < 8; k++)
243                 {
244                     e[k] = ee[k] + eo[k];
245                     e[k + 8] = ee[7 - k] - eo[7 - k];
246                 }
247                 for(k = 0; k < 16; k++)
248                 {
249                     WORD32 itrans_out;
250                     itrans_out =
251                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
252                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253                     itrans_out =
254                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
255                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
256                 }
257                 pi2_tmp++;
258                 pu1_pred += pred_strd;
259                 pu1_dst += dst_strd;
260             }
261         }
262         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
263         {
264             for(j = 0; j < trans_size; j++)
265             {
266                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
267                 for(k = 0; k < 16; k++)
268                 {
269                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
270                                     + g_ai2_ihevc_trans_32[3][k]
271                                                     * pi2_tmp[3 * trans_size]
272                                     + g_ai2_ihevc_trans_32[5][k]
273                                                     * pi2_tmp[5 * trans_size]
274                                     + g_ai2_ihevc_trans_32[7][k]
275                                                     * pi2_tmp[7 * trans_size];
276                 }
277                 for(k = 0; k < 8; k++)
278                 {
279                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
280                                     + g_ai2_ihevc_trans_32[6][k]
281                                                     * pi2_tmp[6 * trans_size];
282                 }
283                 for(k = 0; k < 4; k++)
284                 {
285                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
286                 }
287                 eeeo[0] = 0;
288                 eeeo[1] = 0;
289                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
290                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
291 
292                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
293                 eee[0] = eeee[0] + eeeo[0];
294                 eee[3] = eeee[0] - eeeo[0];
295                 eee[1] = eeee[1] + eeeo[1];
296                 eee[2] = eeee[1] - eeeo[1];
297                 for(k = 0; k < 4; k++)
298                 {
299                     ee[k] = eee[k] + eeo[k];
300                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
301                 }
302                 for(k = 0; k < 8; k++)
303                 {
304                     e[k] = ee[k] + eo[k];
305                     e[k + 8] = ee[7 - k] - eo[7 - k];
306                 }
307                 for(k = 0; k < 16; k++)
308                 {
309                     WORD32 itrans_out;
310                     itrans_out =
311                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
312                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
313                     itrans_out =
314                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
315                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
316                 }
317                 pi2_tmp++;
318                 pu1_pred += pred_strd;
319                 pu1_dst += dst_strd;
320             }
321         }
322         else /* All rows of output of 1st stage are non-zero */
323         {
324             for(j = 0; j < trans_size; j++)
325             {
326                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
327                 for(k = 0; k < 16; k++)
328                 {
329                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
330                                     + g_ai2_ihevc_trans_32[3][k]
331                                                     * pi2_tmp[3 * trans_size]
332                                     + g_ai2_ihevc_trans_32[5][k]
333                                                     * pi2_tmp[5 * trans_size]
334                                     + g_ai2_ihevc_trans_32[7][k]
335                                                     * pi2_tmp[7 * trans_size]
336                                     + g_ai2_ihevc_trans_32[9][k]
337                                                     * pi2_tmp[9 * trans_size]
338                                     + g_ai2_ihevc_trans_32[11][k]
339                                                     * pi2_tmp[11 * trans_size]
340                                     + g_ai2_ihevc_trans_32[13][k]
341                                                     * pi2_tmp[13 * trans_size]
342                                     + g_ai2_ihevc_trans_32[15][k]
343                                                     * pi2_tmp[15 * trans_size]
344                                     + g_ai2_ihevc_trans_32[17][k]
345                                                     * pi2_tmp[17 * trans_size]
346                                     + g_ai2_ihevc_trans_32[19][k]
347                                                     * pi2_tmp[19 * trans_size]
348                                     + g_ai2_ihevc_trans_32[21][k]
349                                                     * pi2_tmp[21 * trans_size]
350                                     + g_ai2_ihevc_trans_32[23][k]
351                                                     * pi2_tmp[23 * trans_size]
352                                     + g_ai2_ihevc_trans_32[25][k]
353                                                     * pi2_tmp[25 * trans_size]
354                                     + g_ai2_ihevc_trans_32[27][k]
355                                                     * pi2_tmp[27 * trans_size]
356                                     + g_ai2_ihevc_trans_32[29][k]
357                                                     * pi2_tmp[29 * trans_size]
358                                     + g_ai2_ihevc_trans_32[31][k]
359                                                     * pi2_tmp[31 * trans_size];
360                 }
361                 for(k = 0; k < 8; k++)
362                 {
363                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
364                                     + g_ai2_ihevc_trans_32[6][k]
365                                                     * pi2_tmp[6 * trans_size]
366                                     + g_ai2_ihevc_trans_32[10][k]
367                                                     * pi2_tmp[10 * trans_size]
368                                     + g_ai2_ihevc_trans_32[14][k]
369                                                     * pi2_tmp[14 * trans_size]
370                                     + g_ai2_ihevc_trans_32[18][k]
371                                                     * pi2_tmp[18 * trans_size]
372                                     + g_ai2_ihevc_trans_32[22][k]
373                                                     * pi2_tmp[22 * trans_size]
374                                     + g_ai2_ihevc_trans_32[26][k]
375                                                     * pi2_tmp[26 * trans_size]
376                                     + g_ai2_ihevc_trans_32[30][k]
377                                                     * pi2_tmp[30 * trans_size];
378                 }
379                 for(k = 0; k < 4; k++)
380                 {
381                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
382                                     + g_ai2_ihevc_trans_32[12][k]
383                                                     * pi2_tmp[12 * trans_size]
384                                     + g_ai2_ihevc_trans_32[20][k]
385                                                     * pi2_tmp[20 * trans_size]
386                                     + g_ai2_ihevc_trans_32[28][k]
387                                                     * pi2_tmp[28 * trans_size];
388                 }
389                 eeeo[0] =
390                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
391                                                 + g_ai2_ihevc_trans_32[24][0]
392                                                                 * pi2_tmp[24
393                                                                                 * trans_size];
394                 eeeo[1] =
395                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
396                                                 + g_ai2_ihevc_trans_32[24][1]
397                                                                 * pi2_tmp[24
398                                                                                 * trans_size];
399                 eeee[0] =
400                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
401                                                 + g_ai2_ihevc_trans_32[16][0]
402                                                                 * pi2_tmp[16
403                                                                                 * trans_size];
404                 eeee[1] =
405                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
406                                                 + g_ai2_ihevc_trans_32[16][1]
407                                                                 * pi2_tmp[16
408                                                                                 * trans_size];
409 
410                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
411                 eee[0] = eeee[0] + eeeo[0];
412                 eee[3] = eeee[0] - eeeo[0];
413                 eee[1] = eeee[1] + eeeo[1];
414                 eee[2] = eeee[1] - eeeo[1];
415                 for(k = 0; k < 4; k++)
416                 {
417                     ee[k] = eee[k] + eeo[k];
418                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
419                 }
420                 for(k = 0; k < 8; k++)
421                 {
422                     e[k] = ee[k] + eo[k];
423                     e[k + 8] = ee[7 - k] - eo[7 - k];
424                 }
425                 for(k = 0; k < 16; k++)
426                 {
427                     WORD32 itrans_out;
428                     itrans_out =
429                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
430                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
431                     itrans_out =
432                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
433                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
434                 }
435                 pi2_tmp++;
436                 pu1_pred += pred_strd;
437                 pu1_dst += dst_strd;
438             }
439         }
440         /************************************************************************************************/
441         /************************************END - IT_RECON_32x32****************************************/
442         /************************************************************************************************/
443     }
444     else if((zero_rows & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of input are non-zero */
445     {
446         /************************************************************************************************/
447         /**********************************START - IT_RECON_32x32****************************************/
448         /************************************************************************************************/
449         /* Inverse Transform 1st stage */
450         shift = IT_SHIFT_STAGE_1;
451         add = 1 << (shift - 1);
452 
453         for(j = 0; j < row_limit_2nd_stage; j++)
454         {
455             /* Checking for Zero Cols */
456             if((zero_cols & 1) == 1)
457             {
458                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
459             }
460             else
461             {
462                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
463                 for(k = 0; k < 16; k++)
464                 {
465                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
466                                     + g_ai2_ihevc_trans_32[3][k]
467                                                     * pi2_src[3 * src_strd]
468                                     + g_ai2_ihevc_trans_32[5][k]
469                                                     * pi2_src[5 * src_strd]
470                                     + g_ai2_ihevc_trans_32[7][k]
471                                                     * pi2_src[7 * src_strd];
472                 }
473                 for(k = 0; k < 8; k++)
474                 {
475                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
476                                     + g_ai2_ihevc_trans_32[6][k]
477                                                     * pi2_src[6 * src_strd];
478                 }
479                 for(k = 0; k < 4; k++)
480                 {
481                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd];
482                 }
483                 eeeo[0] = 0;
484                 eeeo[1] = 0;
485                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
486                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
487 
488                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
489                 eee[0] = eeee[0] + eeeo[0];
490                 eee[3] = eeee[0] - eeeo[0];
491                 eee[1] = eeee[1] + eeeo[1];
492                 eee[2] = eeee[1] - eeeo[1];
493                 for(k = 0; k < 4; k++)
494                 {
495                     ee[k] = eee[k] + eeo[k];
496                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
497                 }
498                 for(k = 0; k < 8; k++)
499                 {
500                     e[k] = ee[k] + eo[k];
501                     e[k + 8] = ee[7 - k] - eo[7 - k];
502                 }
503                 for(k = 0; k < 16; k++)
504                 {
505                     pi2_tmp[k] =
506                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
507                     pi2_tmp[k + 16] =
508                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
509                 }
510             }
511             pi2_src++;
512             pi2_tmp += trans_size;
513             zero_cols = zero_cols >> 1;
514         }
515 
516         pi2_tmp = pi2_tmp_orig;
517 
518         /* Inverse Transform 2nd stage */
519         shift = IT_SHIFT_STAGE_2;
520         add = 1 << (shift - 1);
521         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
522         {
523             for(j = 0; j < trans_size; j++)
524             {
525                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
526                 for(k = 0; k < 16; k++)
527                 {
528                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
529                                     + g_ai2_ihevc_trans_32[3][k]
530                                                     * pi2_tmp[3 * trans_size];
531                 }
532                 for(k = 0; k < 8; k++)
533                 {
534                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
535                 }
536 //                for(k = 0; k < 4; k++)
537                 {
538                     eeo[0] = 0;
539                     eeo[1] = 0;
540                     eeo[2] = 0;
541                     eeo[3] = 0;
542                 }
543                 eeeo[0] = 0;
544                 eeeo[1] = 0;
545                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
546                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
547 
548                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
549                 eee[0] = eeee[0] + eeeo[0];
550                 eee[3] = eeee[0] - eeeo[0];
551                 eee[1] = eeee[1] + eeeo[1];
552                 eee[2] = eeee[1] - eeeo[1];
553                 for(k = 0; k < 4; k++)
554                 {
555                     ee[k] = eee[k] + eeo[k];
556                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
557                 }
558                 for(k = 0; k < 8; k++)
559                 {
560                     e[k] = ee[k] + eo[k];
561                     e[k + 8] = ee[7 - k] - eo[7 - k];
562                 }
563                 for(k = 0; k < 16; k++)
564                 {
565                     WORD32 itrans_out;
566                     itrans_out =
567                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
568                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
569                     itrans_out =
570                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
571                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
572                 }
573                 pi2_tmp++;
574                 pu1_pred += pred_strd;
575                 pu1_dst += dst_strd;
576             }
577         }
578         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
579         {
580             for(j = 0; j < trans_size; j++)
581             {
582                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
583                 for(k = 0; k < 16; k++)
584                 {
585                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
586                                     + g_ai2_ihevc_trans_32[3][k]
587                                                     * pi2_tmp[3 * trans_size]
588                                     + g_ai2_ihevc_trans_32[5][k]
589                                                     * pi2_tmp[5 * trans_size]
590                                     + g_ai2_ihevc_trans_32[7][k]
591                                                     * pi2_tmp[7 * trans_size];
592                 }
593                 for(k = 0; k < 8; k++)
594                 {
595                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
596                                     + g_ai2_ihevc_trans_32[6][k]
597                                                     * pi2_tmp[6 * trans_size];
598                 }
599                 for(k = 0; k < 4; k++)
600                 {
601                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
602                 }
603                 eeeo[0] = 0;
604                 eeeo[1] = 0;
605                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
606                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
607 
608                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
609                 eee[0] = eeee[0] + eeeo[0];
610                 eee[3] = eeee[0] - eeeo[0];
611                 eee[1] = eeee[1] + eeeo[1];
612                 eee[2] = eeee[1] - eeeo[1];
613                 for(k = 0; k < 4; k++)
614                 {
615                     ee[k] = eee[k] + eeo[k];
616                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
617                 }
618                 for(k = 0; k < 8; k++)
619                 {
620                     e[k] = ee[k] + eo[k];
621                     e[k + 8] = ee[7 - k] - eo[7 - k];
622                 }
623                 for(k = 0; k < 16; k++)
624                 {
625                     WORD32 itrans_out;
626                     itrans_out =
627                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
628                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
629                     itrans_out =
630                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
631                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
632                 }
633                 pi2_tmp++;
634                 pu1_pred += pred_strd;
635                 pu1_dst += dst_strd;
636             }
637         }
638         else /* All rows of output of 1st stage are non-zero */
639         {
640             for(j = 0; j < trans_size; j++)
641             {
642                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
643                 for(k = 0; k < 16; k++)
644                 {
645                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
646                                     + g_ai2_ihevc_trans_32[3][k]
647                                                     * pi2_tmp[3 * trans_size]
648                                     + g_ai2_ihevc_trans_32[5][k]
649                                                     * pi2_tmp[5 * trans_size]
650                                     + g_ai2_ihevc_trans_32[7][k]
651                                                     * pi2_tmp[7 * trans_size]
652                                     + g_ai2_ihevc_trans_32[9][k]
653                                                     * pi2_tmp[9 * trans_size]
654                                     + g_ai2_ihevc_trans_32[11][k]
655                                                     * pi2_tmp[11 * trans_size]
656                                     + g_ai2_ihevc_trans_32[13][k]
657                                                     * pi2_tmp[13 * trans_size]
658                                     + g_ai2_ihevc_trans_32[15][k]
659                                                     * pi2_tmp[15 * trans_size]
660                                     + g_ai2_ihevc_trans_32[17][k]
661                                                     * pi2_tmp[17 * trans_size]
662                                     + g_ai2_ihevc_trans_32[19][k]
663                                                     * pi2_tmp[19 * trans_size]
664                                     + g_ai2_ihevc_trans_32[21][k]
665                                                     * pi2_tmp[21 * trans_size]
666                                     + g_ai2_ihevc_trans_32[23][k]
667                                                     * pi2_tmp[23 * trans_size]
668                                     + g_ai2_ihevc_trans_32[25][k]
669                                                     * pi2_tmp[25 * trans_size]
670                                     + g_ai2_ihevc_trans_32[27][k]
671                                                     * pi2_tmp[27 * trans_size]
672                                     + g_ai2_ihevc_trans_32[29][k]
673                                                     * pi2_tmp[29 * trans_size]
674                                     + g_ai2_ihevc_trans_32[31][k]
675                                                     * pi2_tmp[31 * trans_size];
676                 }
677                 for(k = 0; k < 8; k++)
678                 {
679                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
680                                     + g_ai2_ihevc_trans_32[6][k]
681                                                     * pi2_tmp[6 * trans_size]
682                                     + g_ai2_ihevc_trans_32[10][k]
683                                                     * pi2_tmp[10 * trans_size]
684                                     + g_ai2_ihevc_trans_32[14][k]
685                                                     * pi2_tmp[14 * trans_size]
686                                     + g_ai2_ihevc_trans_32[18][k]
687                                                     * pi2_tmp[18 * trans_size]
688                                     + g_ai2_ihevc_trans_32[22][k]
689                                                     * pi2_tmp[22 * trans_size]
690                                     + g_ai2_ihevc_trans_32[26][k]
691                                                     * pi2_tmp[26 * trans_size]
692                                     + g_ai2_ihevc_trans_32[30][k]
693                                                     * pi2_tmp[30 * trans_size];
694                 }
695                 for(k = 0; k < 4; k++)
696                 {
697                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
698                                     + g_ai2_ihevc_trans_32[12][k]
699                                                     * pi2_tmp[12 * trans_size]
700                                     + g_ai2_ihevc_trans_32[20][k]
701                                                     * pi2_tmp[20 * trans_size]
702                                     + g_ai2_ihevc_trans_32[28][k]
703                                                     * pi2_tmp[28 * trans_size];
704                 }
705                 eeeo[0] =
706                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
707                                                 + g_ai2_ihevc_trans_32[24][0]
708                                                                 * pi2_tmp[24
709                                                                                 * trans_size];
710                 eeeo[1] =
711                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
712                                                 + g_ai2_ihevc_trans_32[24][1]
713                                                                 * pi2_tmp[24
714                                                                                 * trans_size];
715                 eeee[0] =
716                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
717                                                 + g_ai2_ihevc_trans_32[16][0]
718                                                                 * pi2_tmp[16
719                                                                                 * trans_size];
720                 eeee[1] =
721                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
722                                                 + g_ai2_ihevc_trans_32[16][1]
723                                                                 * pi2_tmp[16
724                                                                                 * trans_size];
725 
726                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
727                 eee[0] = eeee[0] + eeeo[0];
728                 eee[3] = eeee[0] - eeeo[0];
729                 eee[1] = eeee[1] + eeeo[1];
730                 eee[2] = eeee[1] - eeeo[1];
731                 for(k = 0; k < 4; k++)
732                 {
733                     ee[k] = eee[k] + eeo[k];
734                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
735                 }
736                 for(k = 0; k < 8; k++)
737                 {
738                     e[k] = ee[k] + eo[k];
739                     e[k + 8] = ee[7 - k] - eo[7 - k];
740                 }
741                 for(k = 0; k < 16; k++)
742                 {
743                     WORD32 itrans_out;
744                     itrans_out =
745                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
746                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
747                     itrans_out =
748                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
749                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
750                 }
751                 pi2_tmp++;
752                 pu1_pred += pred_strd;
753                 pu1_dst += dst_strd;
754             }
755         }
756         /************************************************************************************************/
757         /************************************END - IT_RECON_32x32****************************************/
758         /************************************************************************************************/
759     }
760     else  /* All rows of input are non-zero */
761     {
762         /************************************************************************************************/
763         /**********************************START - IT_RECON_32x32****************************************/
764         /************************************************************************************************/
765         /* Inverse Transform 1st stage */
766         shift = IT_SHIFT_STAGE_1;
767         add = 1 << (shift - 1);
768 
769         for(j = 0; j < row_limit_2nd_stage; j++)
770         {
771             /* Checking for Zero Cols */
772             if((zero_cols & 1) == 1)
773             {
774                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
775             }
776             else
777             {
778                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
779                 for(k = 0; k < 16; k++)
780                 {
781                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
782                                     + g_ai2_ihevc_trans_32[3][k]
783                                                     * pi2_src[3 * src_strd]
784                                     + g_ai2_ihevc_trans_32[5][k]
785                                                     * pi2_src[5 * src_strd]
786                                     + g_ai2_ihevc_trans_32[7][k]
787                                                     * pi2_src[7 * src_strd]
788                                     + g_ai2_ihevc_trans_32[9][k]
789                                                     * pi2_src[9 * src_strd]
790                                     + g_ai2_ihevc_trans_32[11][k]
791                                                     * pi2_src[11 * src_strd]
792                                     + g_ai2_ihevc_trans_32[13][k]
793                                                     * pi2_src[13 * src_strd]
794                                     + g_ai2_ihevc_trans_32[15][k]
795                                                     * pi2_src[15 * src_strd]
796                                     + g_ai2_ihevc_trans_32[17][k]
797                                                     * pi2_src[17 * src_strd]
798                                     + g_ai2_ihevc_trans_32[19][k]
799                                                     * pi2_src[19 * src_strd]
800                                     + g_ai2_ihevc_trans_32[21][k]
801                                                     * pi2_src[21 * src_strd]
802                                     + g_ai2_ihevc_trans_32[23][k]
803                                                     * pi2_src[23 * src_strd]
804                                     + g_ai2_ihevc_trans_32[25][k]
805                                                     * pi2_src[25 * src_strd]
806                                     + g_ai2_ihevc_trans_32[27][k]
807                                                     * pi2_src[27 * src_strd]
808                                     + g_ai2_ihevc_trans_32[29][k]
809                                                     * pi2_src[29 * src_strd]
810                                     + g_ai2_ihevc_trans_32[31][k]
811                                                     * pi2_src[31 * src_strd];
812                 }
813                 for(k = 0; k < 8; k++)
814                 {
815                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
816                                     + g_ai2_ihevc_trans_32[6][k]
817                                                     * pi2_src[6 * src_strd]
818                                     + g_ai2_ihevc_trans_32[10][k]
819                                                     * pi2_src[10 * src_strd]
820                                     + g_ai2_ihevc_trans_32[14][k]
821                                                     * pi2_src[14 * src_strd]
822                                     + g_ai2_ihevc_trans_32[18][k]
823                                                     * pi2_src[18 * src_strd]
824                                     + g_ai2_ihevc_trans_32[22][k]
825                                                     * pi2_src[22 * src_strd]
826                                     + g_ai2_ihevc_trans_32[26][k]
827                                                     * pi2_src[26 * src_strd]
828                                     + g_ai2_ihevc_trans_32[30][k]
829                                                     * pi2_src[30 * src_strd];
830                 }
831                 for(k = 0; k < 4; k++)
832                 {
833                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]
834                                     + g_ai2_ihevc_trans_32[12][k]
835                                                     * pi2_src[12 * src_strd]
836                                     + g_ai2_ihevc_trans_32[20][k]
837                                                     * pi2_src[20 * src_strd]
838                                     + g_ai2_ihevc_trans_32[28][k]
839                                                     * pi2_src[28 * src_strd];
840                 }
841                 eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd]
842                                 + g_ai2_ihevc_trans_32[24][0]
843                                                 * pi2_src[24 * src_strd];
844                 eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd]
845                                 + g_ai2_ihevc_trans_32[24][1]
846                                                 * pi2_src[24 * src_strd];
847                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]
848                                 + g_ai2_ihevc_trans_32[16][0]
849                                                 * pi2_src[16 * src_strd];
850                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]
851                                 + g_ai2_ihevc_trans_32[16][1]
852                                                 * pi2_src[16 * src_strd];
853 
854                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
855                 eee[0] = eeee[0] + eeeo[0];
856                 eee[3] = eeee[0] - eeeo[0];
857                 eee[1] = eeee[1] + eeeo[1];
858                 eee[2] = eeee[1] - eeeo[1];
859                 for(k = 0; k < 4; k++)
860                 {
861                     ee[k] = eee[k] + eeo[k];
862                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
863                 }
864                 for(k = 0; k < 8; k++)
865                 {
866                     e[k] = ee[k] + eo[k];
867                     e[k + 8] = ee[7 - k] - eo[7 - k];
868                 }
869                 for(k = 0; k < 16; k++)
870                 {
871                     pi2_tmp[k] =
872                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
873                     pi2_tmp[k + 16] =
874                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
875                 }
876             }
877             pi2_src++;
878             pi2_tmp += trans_size;
879             zero_cols = zero_cols >> 1;
880         }
881 
882         pi2_tmp = pi2_tmp_orig;
883 
884         /* Inverse Transform 2nd stage */
885         shift = IT_SHIFT_STAGE_2;
886         add = 1 << (shift - 1);
887         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
888         {
889             for(j = 0; j < trans_size; j++)
890             {
891                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
892                 for(k = 0; k < 16; k++)
893                 {
894                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
895                                     + g_ai2_ihevc_trans_32[3][k]
896                                                     * pi2_tmp[3 * trans_size];
897                 }
898                 for(k = 0; k < 8; k++)
899                 {
900                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
901                 }
902 //                for(k = 0; k < 4; k++)
903                 {
904                     eeo[0] = 0;
905                     eeo[1] = 0;
906                     eeo[2] = 0;
907                     eeo[3] = 0;
908                 }
909                 eeeo[0] = 0;
910                 eeeo[1] = 0;
911                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
912                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
913 
914                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
915                 eee[0] = eeee[0] + eeeo[0];
916                 eee[3] = eeee[0] - eeeo[0];
917                 eee[1] = eeee[1] + eeeo[1];
918                 eee[2] = eeee[1] - eeeo[1];
919                 for(k = 0; k < 4; k++)
920                 {
921                     ee[k] = eee[k] + eeo[k];
922                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
923                 }
924                 for(k = 0; k < 8; k++)
925                 {
926                     e[k] = ee[k] + eo[k];
927                     e[k + 8] = ee[7 - k] - eo[7 - k];
928                 }
929                 for(k = 0; k < 16; k++)
930                 {
931                     WORD32 itrans_out;
932                     itrans_out =
933                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
934                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
935                     itrans_out =
936                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
937                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
938                 }
939                 pi2_tmp++;
940                 pu1_pred += pred_strd;
941                 pu1_dst += dst_strd;
942             }
943         }
944         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
945         {
946             for(j = 0; j < trans_size; j++)
947             {
948                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
949                 for(k = 0; k < 16; k++)
950                 {
951                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
952                                     + g_ai2_ihevc_trans_32[3][k]
953                                                     * pi2_tmp[3 * trans_size]
954                                     + g_ai2_ihevc_trans_32[5][k]
955                                                     * pi2_tmp[5 * trans_size]
956                                     + g_ai2_ihevc_trans_32[7][k]
957                                                     * pi2_tmp[7 * trans_size];
958                 }
959                 for(k = 0; k < 8; k++)
960                 {
961                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
962                                     + g_ai2_ihevc_trans_32[6][k]
963                                                     * pi2_tmp[6 * trans_size];
964                 }
965                 for(k = 0; k < 4; k++)
966                 {
967                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
968                 }
969                 eeeo[0] = 0;
970                 eeeo[1] = 0;
971                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
972                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
973 
974                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
975                 eee[0] = eeee[0] + eeeo[0];
976                 eee[3] = eeee[0] - eeeo[0];
977                 eee[1] = eeee[1] + eeeo[1];
978                 eee[2] = eeee[1] - eeeo[1];
979                 for(k = 0; k < 4; k++)
980                 {
981                     ee[k] = eee[k] + eeo[k];
982                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
983                 }
984                 for(k = 0; k < 8; k++)
985                 {
986                     e[k] = ee[k] + eo[k];
987                     e[k + 8] = ee[7 - k] - eo[7 - k];
988                 }
989                 for(k = 0; k < 16; k++)
990                 {
991                     WORD32 itrans_out;
992                     itrans_out =
993                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
994                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
995                     itrans_out =
996                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
997                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
998                 }
999                 pi2_tmp++;
1000                 pu1_pred += pred_strd;
1001                 pu1_dst += dst_strd;
1002             }
1003         }
1004         else /* All rows of output of 1st stage are non-zero */
1005         {
1006             for(j = 0; j < trans_size; j++)
1007             {
1008                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1009                 for(k = 0; k < 16; k++)
1010                 {
1011                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
1012                                     + g_ai2_ihevc_trans_32[3][k]
1013                                                     * pi2_tmp[3 * trans_size]
1014                                     + g_ai2_ihevc_trans_32[5][k]
1015                                                     * pi2_tmp[5 * trans_size]
1016                                     + g_ai2_ihevc_trans_32[7][k]
1017                                                     * pi2_tmp[7 * trans_size]
1018                                     + g_ai2_ihevc_trans_32[9][k]
1019                                                     * pi2_tmp[9 * trans_size]
1020                                     + g_ai2_ihevc_trans_32[11][k]
1021                                                     * pi2_tmp[11 * trans_size]
1022                                     + g_ai2_ihevc_trans_32[13][k]
1023                                                     * pi2_tmp[13 * trans_size]
1024                                     + g_ai2_ihevc_trans_32[15][k]
1025                                                     * pi2_tmp[15 * trans_size]
1026                                     + g_ai2_ihevc_trans_32[17][k]
1027                                                     * pi2_tmp[17 * trans_size]
1028                                     + g_ai2_ihevc_trans_32[19][k]
1029                                                     * pi2_tmp[19 * trans_size]
1030                                     + g_ai2_ihevc_trans_32[21][k]
1031                                                     * pi2_tmp[21 * trans_size]
1032                                     + g_ai2_ihevc_trans_32[23][k]
1033                                                     * pi2_tmp[23 * trans_size]
1034                                     + g_ai2_ihevc_trans_32[25][k]
1035                                                     * pi2_tmp[25 * trans_size]
1036                                     + g_ai2_ihevc_trans_32[27][k]
1037                                                     * pi2_tmp[27 * trans_size]
1038                                     + g_ai2_ihevc_trans_32[29][k]
1039                                                     * pi2_tmp[29 * trans_size]
1040                                     + g_ai2_ihevc_trans_32[31][k]
1041                                                     * pi2_tmp[31 * trans_size];
1042                 }
1043                 for(k = 0; k < 8; k++)
1044                 {
1045                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
1046                                     + g_ai2_ihevc_trans_32[6][k]
1047                                                     * pi2_tmp[6 * trans_size]
1048                                     + g_ai2_ihevc_trans_32[10][k]
1049                                                     * pi2_tmp[10 * trans_size]
1050                                     + g_ai2_ihevc_trans_32[14][k]
1051                                                     * pi2_tmp[14 * trans_size]
1052                                     + g_ai2_ihevc_trans_32[18][k]
1053                                                     * pi2_tmp[18 * trans_size]
1054                                     + g_ai2_ihevc_trans_32[22][k]
1055                                                     * pi2_tmp[22 * trans_size]
1056                                     + g_ai2_ihevc_trans_32[26][k]
1057                                                     * pi2_tmp[26 * trans_size]
1058                                     + g_ai2_ihevc_trans_32[30][k]
1059                                                     * pi2_tmp[30 * trans_size];
1060                 }
1061                 for(k = 0; k < 4; k++)
1062                 {
1063                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
1064                                     + g_ai2_ihevc_trans_32[12][k]
1065                                                     * pi2_tmp[12 * trans_size]
1066                                     + g_ai2_ihevc_trans_32[20][k]
1067                                                     * pi2_tmp[20 * trans_size]
1068                                     + g_ai2_ihevc_trans_32[28][k]
1069                                                     * pi2_tmp[28 * trans_size];
1070                 }
1071                 eeeo[0] =
1072                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
1073                                                 + g_ai2_ihevc_trans_32[24][0]
1074                                                                 * pi2_tmp[24
1075                                                                                 * trans_size];
1076                 eeeo[1] =
1077                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
1078                                                 + g_ai2_ihevc_trans_32[24][1]
1079                                                                 * pi2_tmp[24
1080                                                                                 * trans_size];
1081                 eeee[0] =
1082                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
1083                                                 + g_ai2_ihevc_trans_32[16][0]
1084                                                                 * pi2_tmp[16
1085                                                                                 * trans_size];
1086                 eeee[1] =
1087                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
1088                                                 + g_ai2_ihevc_trans_32[16][1]
1089                                                                 * pi2_tmp[16
1090                                                                                 * trans_size];
1091 
1092                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
1093                 eee[0] = eeee[0] + eeeo[0];
1094                 eee[3] = eeee[0] - eeeo[0];
1095                 eee[1] = eeee[1] + eeeo[1];
1096                 eee[2] = eeee[1] - eeeo[1];
1097                 for(k = 0; k < 4; k++)
1098                 {
1099                     ee[k] = eee[k] + eeo[k];
1100                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
1101                 }
1102                 for(k = 0; k < 8; k++)
1103                 {
1104                     e[k] = ee[k] + eo[k];
1105                     e[k + 8] = ee[7 - k] - eo[7 - k];
1106                 }
1107                 for(k = 0; k < 16; k++)
1108                 {
1109                     WORD32 itrans_out;
1110                     itrans_out =
1111                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
1112                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
1113                     itrans_out =
1114                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
1115                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
1116                 }
1117                 pi2_tmp++;
1118                 pu1_pred += pred_strd;
1119                 pu1_dst += dst_strd;
1120             }
1121         }
1122         /************************************************************************************************/
1123         /************************************END - IT_RECON_32x32****************************************/
1124         /************************************************************************************************/
1125     }
1126 }
1127 
1128