1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevc_itrans_recon_8x8.c
22 *
23 * @brief
24 * Contains function definitions for inverse transform and reconstruction 8x8
25 *
26 *
27 * @author
28 * 100470
29 *
30 * @par List of Functions:
31 * - ihevc_itrans_recon_8x8()
32 *
33 * @remarks
34 * None
35 *
36 *******************************************************************************
37 */
38 #include <stdio.h>
39 #include <string.h>
40 #include "ihevc_typedefs.h"
41 #include "ihevc_macros.h"
42 #include "ihevc_platform_macros.h"
43 #include "ihevc_defs.h"
44 #include "ihevc_trans_tables.h"
45 #include "ihevc_itrans_recon.h"
46 #include "ihevc_func_selector.h"
47 #include "ihevc_trans_macros.h"
48
49 /**
50 *******************************************************************************
51 *
52 * @brief
53 * This function performs Inverse transform and reconstruction for 8x8
54 * input block
55 *
56 * @par Description:
57 * Performs inverse transform and adds the prediction data and clips output
58 * to 8 bit
59 *
60 * @param[in] pi2_src
61 * Input 8x8 coefficients
62 *
63 * @param[in] pi2_tmp
64 * Temporary 8x8 buffer for storing inverse
65 *
66 * transform
67 * 1st stage output
68 *
69 * @param[in] pu1_pred
70 * Prediction 8x8 block
71 *
72 * @param[out] pu1_dst
73 * Output 8x8 block
74 *
75 * @param[in] src_strd
76 * Input stride
77 *
78 * @param[in] pred_strd
79 * Prediction stride
80 *
81 * @param[in] dst_strd
82 * Output Stride
83 *
84 * @param[in] shift
85 * Output shift
86 *
87 * @param[in] zero_cols
88 * Zero columns in pi2_src
89 *
90 * @returns Void
91 *
92 * @remarks
93 * None
94 *
95 *******************************************************************************
96 */
97
ihevc_itrans_recon_8x8(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)98 void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99 WORD16 *pi2_tmp,
100 UWORD8 *pu1_pred,
101 UWORD8 *pu1_dst,
102 WORD32 src_strd,
103 WORD32 pred_strd,
104 WORD32 dst_strd,
105 WORD32 zero_cols,
106 WORD32 zero_rows)
107 {
108 WORD32 j, k;
109 WORD32 e[4], o[4];
110 WORD32 ee[2], eo[2];
111 WORD32 add;
112 WORD32 shift;
113 WORD16 *pi2_tmp_orig;
114 WORD32 trans_size;
115 WORD32 zero_rows_2nd_stage = zero_cols;
116 WORD32 row_limit_2nd_stage;
117
118 trans_size = TRANS_SIZE_8;
119
120 pi2_tmp_orig = pi2_tmp;
121
122 if((zero_cols & 0xF0) == 0xF0)
123 row_limit_2nd_stage = 4;
124 else
125 row_limit_2nd_stage = TRANS_SIZE_8;
126
127
128 if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129 {
130 /************************************************************************************************/
131 /**********************************START - IT_RECON_8x8******************************************/
132 /************************************************************************************************/
133
134 /* Inverse Transform 1st stage */
135 shift = IT_SHIFT_STAGE_1;
136 add = 1 << (shift - 1);
137
138 for(j = 0; j < row_limit_2nd_stage; j++)
139 {
140 /* Checking for Zero Cols */
141 if((zero_cols & 1) == 1)
142 {
143 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144 }
145 else
146 {
147 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148 for(k = 0; k < 4; k++)
149 {
150 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151 + g_ai2_ihevc_trans_8[3][k]
152 * pi2_src[3 * src_strd];
153 }
154 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158
159 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160 e[0] = ee[0] + eo[0];
161 e[3] = ee[0] - eo[0];
162 e[1] = ee[1] + eo[1];
163 e[2] = ee[1] - eo[1];
164 for(k = 0; k < 4; k++)
165 {
166 pi2_tmp[k] =
167 CLIP_S16(((e[k] + o[k] + add) >> shift));
168 pi2_tmp[k + 4] =
169 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170 }
171 }
172 pi2_src++;
173 pi2_tmp += trans_size;
174 zero_cols = zero_cols >> 1;
175 }
176
177 pi2_tmp = pi2_tmp_orig;
178
179 /* Inverse Transform 2nd stage */
180 shift = IT_SHIFT_STAGE_2;
181 add = 1 << (shift - 1);
182 if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183 {
184 for(j = 0; j < trans_size; j++)
185 {
186 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187 for(k = 0; k < 4; k++)
188 {
189 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190 + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191 }
192 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196
197 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198 e[0] = ee[0] + eo[0];
199 e[3] = ee[0] - eo[0];
200 e[1] = ee[1] + eo[1];
201 e[2] = ee[1] - eo[1];
202 for(k = 0; k < 4; k++)
203 {
204 WORD32 itrans_out;
205 itrans_out =
206 CLIP_S16(((e[k] + o[k] + add) >> shift));
207 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208 itrans_out =
209 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211 }
212 pi2_tmp++;
213 pu1_pred += pred_strd;
214 pu1_dst += dst_strd;
215 }
216 }
217 else /* All rows of output of 1st stage are non-zero */
218 {
219 for(j = 0; j < trans_size; j++)
220 {
221 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222 for(k = 0; k < 4; k++)
223 {
224 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225 + g_ai2_ihevc_trans_8[3][k]
226 * pi2_tmp[3 * trans_size]
227 + g_ai2_ihevc_trans_8[5][k]
228 * pi2_tmp[5 * trans_size]
229 + g_ai2_ihevc_trans_8[7][k]
230 * pi2_tmp[7 * trans_size];
231 }
232
233 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241
242 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243 e[0] = ee[0] + eo[0];
244 e[3] = ee[0] - eo[0];
245 e[1] = ee[1] + eo[1];
246 e[2] = ee[1] - eo[1];
247 for(k = 0; k < 4; k++)
248 {
249 WORD32 itrans_out;
250 itrans_out =
251 CLIP_S16(((e[k] + o[k] + add) >> shift));
252 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253 itrans_out =
254 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256 }
257 pi2_tmp++;
258 pu1_pred += pred_strd;
259 pu1_dst += dst_strd;
260 }
261 }
262 /************************************************************************************************/
263 /************************************END - IT_RECON_8x8******************************************/
264 /************************************************************************************************/
265 }
266 else /* All rows of input are non-zero */
267 {
268 /************************************************************************************************/
269 /**********************************START - IT_RECON_8x8******************************************/
270 /************************************************************************************************/
271
272 /* Inverse Transform 1st stage */
273 shift = IT_SHIFT_STAGE_1;
274 add = 1 << (shift - 1);
275
276 for(j = 0; j < row_limit_2nd_stage; j++)
277 {
278 /* Checking for Zero Cols */
279 if((zero_cols & 1) == 1)
280 {
281 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282 }
283 else
284 {
285 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286 for(k = 0; k < 4; k++)
287 {
288 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289 + g_ai2_ihevc_trans_8[3][k]
290 * pi2_src[3 * src_strd]
291 + g_ai2_ihevc_trans_8[5][k]
292 * pi2_src[5 * src_strd]
293 + g_ai2_ihevc_trans_8[7][k]
294 * pi2_src[7 * src_strd];
295 }
296
297 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305
306 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307 e[0] = ee[0] + eo[0];
308 e[3] = ee[0] - eo[0];
309 e[1] = ee[1] + eo[1];
310 e[2] = ee[1] - eo[1];
311 for(k = 0; k < 4; k++)
312 {
313 pi2_tmp[k] =
314 CLIP_S16(((e[k] + o[k] + add) >> shift));
315 pi2_tmp[k + 4] =
316 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317 }
318 }
319 pi2_src++;
320 pi2_tmp += trans_size;
321 zero_cols = zero_cols >> 1;
322 }
323
324 pi2_tmp = pi2_tmp_orig;
325
326 /* Inverse Transform 2nd stage */
327 shift = IT_SHIFT_STAGE_2;
328 add = 1 << (shift - 1);
329 if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330 {
331 for(j = 0; j < trans_size; j++)
332 {
333 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334 for(k = 0; k < 4; k++)
335 {
336 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337 + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338 }
339 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343
344 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345 e[0] = ee[0] + eo[0];
346 e[3] = ee[0] - eo[0];
347 e[1] = ee[1] + eo[1];
348 e[2] = ee[1] - eo[1];
349 for(k = 0; k < 4; k++)
350 {
351 WORD32 itrans_out;
352 itrans_out =
353 CLIP_S16(((e[k] + o[k] + add) >> shift));
354 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355 itrans_out =
356 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358 }
359 pi2_tmp++;
360 pu1_pred += pred_strd;
361 pu1_dst += dst_strd;
362 }
363 }
364 else /* All rows of output of 1st stage are non-zero */
365 {
366 for(j = 0; j < trans_size; j++)
367 {
368 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369 for(k = 0; k < 4; k++)
370 {
371 o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372 + g_ai2_ihevc_trans_8[3][k]
373 * pi2_tmp[3 * trans_size]
374 + g_ai2_ihevc_trans_8[5][k]
375 * pi2_tmp[5 * trans_size]
376 + g_ai2_ihevc_trans_8[7][k]
377 * pi2_tmp[7 * trans_size];
378 }
379
380 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388
389 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390 e[0] = ee[0] + eo[0];
391 e[3] = ee[0] - eo[0];
392 e[1] = ee[1] + eo[1];
393 e[2] = ee[1] - eo[1];
394 for(k = 0; k < 4; k++)
395 {
396 WORD32 itrans_out;
397 itrans_out =
398 CLIP_S16(((e[k] + o[k] + add) >> shift));
399 pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400 itrans_out =
401 CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402 pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403 }
404 pi2_tmp++;
405 pu1_pred += pred_strd;
406 pu1_dst += dst_strd;
407 }
408 }
409 /************************************************************************************************/
410 /************************************END - IT_RECON_8x8******************************************/
411 /************************************************************************************************/
412 }
413 }
414
415