1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /*
19 ------------------------------------------------------------------------------
20 MODULE DESCRIPTION
21
22 This file contains the functions that transform an 8r8 image block from
23 dequantized DCT coefficients to spatial domain pirel values by calculating
24 inverse discrete cosine transform (IDCT).
25
26 ------------------------------------------------------------------------------
27 */
28 /*----------------------------------------------------------------------------
29 ; INCLUDES
30 ----------------------------------------------------------------------------*/
31 #include "mp4dec_lib.h"
32 #include "idct.h"
33 #include "motion_comp.h"
34 #ifndef FAST_IDCT
35
36 /*
37 ------------------------------------------------------------------------------
38 FUNCTION NAME: idct
39 ------------------------------------------------------------------------------
40 INPUT AND OUTPUT DEFINITIONS FOR idct
41
42 Inputs:
43 blk = pointer to the buffer containing the dequantized DCT
44 coefficients of type int for an 8r8 image block;
45 values range from (-2048, 2047) which defined as standard.
46
47 Local Stores/Buffers/Pointers Needed:
48 None
49
50 Global Stores/Buffers/Pointers Needed:
51 None
52
53 Outputs:
54 None
55
56 Pointers and Buffers Modified:
57 blk points to the found IDCT values for an 8r8 image block.
58
59 Local Stores Modified:
60 None
61
62 Global Stores Modified:
63 None
64
65 ------------------------------------------------------------------------------
66 FUNCTION DESCRIPTION FOR idct
67
68 This function transforms an 8r8 image block from dequantized DCT coefficients
69 (F(u,v)) to spatial domain pirel values (f(r,y)) by performing the two
70 dimensional inverse discrete cosine transform (IDCT).
71
72 _7_ _7_ C(u) C(v)
73 f(r,y) = \ \ F(u,v)---- ----cos[(2r+1)*u*pi/16]cos[(2y+1)*v*pi/16]
74 /__ /__ 2 2
75 u=0 v=0
76
77 where C(i) = 1/sqrt(2) if i=0
78 C(i) = 1 otherwise
79
80 2-D IDCT can be separated as horizontal(row-wise) and vertical(column-wise)
81 1-D IDCTs. Therefore, 2-D IDCT values are found by the following two steps:
82 1. Find horizontal 1-D IDCT values for each row from 8r8 dequantized DCT
83 coefficients by row IDCT operation.
84
85 _7_ C(u)
86 g(r,v) = \ F(u,v) ---- cos[(2r+1)*u*pi/16]
87 /__ 2
88 u=0
89
90 2. Find vertical 1-D IDCT values for each column from the results of 1
91 by column IDCT operation.
92
93 _7_ C(v)
94 f(r,y) = \ g(r,v) ---- cos[(2y+1)*v*pi/16]
95 /__ 2
96 v=0
97
98 ------------------------------------------------------------------------------
99 REQUIREMENTS FOR idct
100
101 None
102
103 ------------------------------------------------------------------------------
104 */
105 /* REFERENCES FOR idct */
106 /* idct.c, inverse fast discrete cosine transform
107 inverse two dimensional DCT, Chen-Wang algorithm
108 (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)
109 32-bit integer arithmetic (8 bit coefficients)
110 11 mults, 29 adds per DCT
111 sE, 18.8.91
112
113 coefficients ertended to 12 bit for IEEE1180-1990
114 compliance sE, 2.1.94
115 */
116
117
118 /*----------------------------------------------------------------------------
119 ; Function Code FOR idct
120 ----------------------------------------------------------------------------*/
idct_intra(int * blk,uint8 * comp,int width)121 void idct_intra(
122 int *blk, uint8 *comp, int width
123 )
124 {
125 /*----------------------------------------------------------------------------
126 ; Define all local variables
127 ----------------------------------------------------------------------------*/
128 int i;
129 int32 tmpBLK[64];
130 int32 *tmpBLK32 = &tmpBLK[0];
131 int32 r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
132 int32 a;
133 int offset = width - 8;
134 /*----------------------------------------------------------------------------
135 ; Function body here
136 ----------------------------------------------------------------------------*/
137 /* two dimensional inverse discrete cosine transform */
138
139
140 /* column (vertical) IDCT */
141 for (i = B_SIZE - 1; i >= 0; i--)
142 {
143 /* initialize butterfly nodes at first stage */
144
145 r1 = blk[B_SIZE * 4 + i] << 11;
146 /* since row IDCT results have net left shift by 3 */
147 /* this left shift by 8 gives net left shift by 11 */
148 /* in order to maintain the same scale as that of */
149 /* coefficients Wi */
150
151 r2 = blk[B_SIZE * 6 + i];
152 r3 = blk[B_SIZE * 2 + i];
153 r4 = blk[B_SIZE * 1 + i];
154 r5 = blk[B_SIZE * 7 + i];
155 r6 = blk[B_SIZE * 5 + i];
156 r7 = blk[B_SIZE * 3 + i];
157
158 if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
159 {
160 /* shortcut */
161 /* execute if values of g(r,1) to g(r,7) in a column*/
162 /* are all zeros */
163
164 /* make output of IDCT >>3 or scaled by 1/8 and */
165 /* with the proper rounding */
166 a = (blk[B_SIZE * 0 + i]) << 3;
167 tmpBLK32[B_SIZE * 0 + i] = a;
168 tmpBLK32[B_SIZE * 1 + i] = a;
169 tmpBLK32[B_SIZE * 2 + i] = a;
170 tmpBLK32[B_SIZE * 3 + i] = a;
171 tmpBLK32[B_SIZE * 4 + i] = a;
172 tmpBLK32[B_SIZE * 5 + i] = a;
173 tmpBLK32[B_SIZE * 6 + i] = a;
174 tmpBLK32[B_SIZE * 7 + i] = a;
175 }
176 else
177 {
178 r0 = (blk[8 * 0 + i] << 11) + 128;
179
180 /* first stage */
181
182 r8 = W7 * (r4 + r5);
183 r4 = (r8 + (W1 - W7) * r4);
184 /* Multiplication with Wi increases the net left */
185 /* shift from 11 to 14,we have to shift back by 3*/
186 r5 = (r8 - (W1 + W7) * r5);
187 r8 = W3 * (r6 + r7);
188 r6 = (r8 - (W3 - W5) * r6);
189 r7 = (r8 - (W3 + W5) * r7);
190
191 /* second stage */
192 r8 = r0 + r1;
193 r0 -= r1;
194
195 r1 = W6 * (r3 + r2);
196 r2 = (r1 - (W2 + W6) * r2);
197 r3 = (r1 + (W2 - W6) * r3);
198
199 r1 = r4 + r6;
200 r4 -= r6;
201 r6 = r5 + r7;
202 r5 -= r7;
203
204 /* third stage */
205 r7 = r8 + r3;
206 r8 -= r3;
207 r3 = r0 + r2;
208 r0 -= r2;
209 r2 = (181 * (r4 + r5) + 128) >> 8; /* rounding */
210 r4 = (181 * (r4 - r5) + 128) >> 8;
211
212 /* fourth stage */
213 /* net shift of IDCT is >>3 after the following */
214 /* shift operation, it makes output of 2-D IDCT */
215 /* scaled by 1/8, that is scaled twice by */
216 /* 1/(2*sqrt(2)) for row IDCT and column IDCT. */
217 /* see detail analysis in design doc. */
218 tmpBLK32[0 + i] = (r7 + r1) >> 8;
219 tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
220 tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
221 tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
222 tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
223 tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
224 tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
225 tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
226 }
227 }
228 /* row (horizontal) IDCT */
229 for (i = 0 ; i < B_SIZE; i++)
230 {
231 /* initialize butterfly nodes at the first stage */
232
233 r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
234 /* r1 left shift by 11 is to maintain the same */
235 /* scale as that of coefficients (W1,...W7) */
236 /* since blk[4] won't multiply with Wi. */
237 /* see detail diagram in design document. */
238
239 r2 = tmpBLK32[6+(i<<3)];
240 r3 = tmpBLK32[2+(i<<3)];
241 r4 = tmpBLK32[1+(i<<3)];
242 r5 = tmpBLK32[7+(i<<3)];
243 r6 = tmpBLK32[5+(i<<3)];
244 r7 = tmpBLK32[3+(i<<3)];
245
246 if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
247 {
248 /* shortcut */
249 /* execute if values of F(1,v) to F(7,v) in a row*/
250 /* are all zeros */
251
252 /* output of row IDCT scaled by 8 */
253 a = (((int32)tmpBLK32[0+(i<<3)] + 32) >> 6);
254 CLIP_RESULT(a)
255 *comp++ = a;
256 *comp++ = a;
257 *comp++ = a;
258 *comp++ = a;
259 *comp++ = a;
260 *comp++ = a;
261 *comp++ = a;
262 *comp++ = a;
263
264 comp += offset;
265 }
266
267 else
268 {
269 /* for proper rounding in the fourth stage */
270 r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
271
272 /* first stage */
273
274 r8 = W7 * (r4 + r5) + 4;
275 r4 = (r8 + (W1 - W7) * r4) >> 3;
276 r5 = (r8 - (W1 + W7) * r5) >> 3;
277
278 r8 = W3 * (r6 + r7) + 4;
279 r6 = (r8 - (W3 - W5) * r6) >> 3;
280 r7 = (r8 - (W3 + W5) * r7) >> 3;
281
282 /* second stage */
283 r8 = r0 + r1;
284 r0 -= r1;
285
286 r1 = W6 * (r3 + r2) + 4;
287 r2 = (r1 - (W2 + W6) * r2) >> 3;
288 r3 = (r1 + (W2 - W6) * r3) >> 3;
289
290 r1 = r4 + r6;
291 r4 -= r6;
292 r6 = r5 + r7;
293 r5 -= r7;
294
295 /* third stage */
296 r7 = r8 + r3;
297 r8 -= r3;
298 r3 = r0 + r2;
299 r0 -= r2;
300 r2 = (181 * (r4 + r5) + 128) >> 8; /* rounding */
301 r4 = (181 * (r4 - r5) + 128) >> 8;
302
303 /* fourth stage */
304 /* net shift of this function is <<3 after the */
305 /* following shift operation, it makes output of */
306 /* row IDCT scaled by 8 to retain 3 bits precision*/
307 a = ((r7 + r1) >> 14);
308 CLIP_RESULT(a)
309 *comp++ = a;
310 a = ((r3 + r2) >> 14);
311 CLIP_RESULT(a)
312 *comp++ = a;
313 a = ((r0 + r4) >> 14);
314 CLIP_RESULT(a)
315 *comp++ = a;
316 a = ((r8 + r6) >> 14);
317 CLIP_RESULT(a)
318 *comp++ = a;
319 a = ((r8 - r6) >> 14);
320 CLIP_RESULT(a)
321 *comp++ = a;
322 a = ((r0 - r4) >> 14);
323 CLIP_RESULT(a)
324 *comp++ = a;
325 a = ((r3 - r2) >> 14);
326 CLIP_RESULT(a)
327 *comp++ = a;
328 a = ((r7 - r1) >> 14);
329 CLIP_RESULT(a)
330 *comp++ = a;
331
332 comp += offset;
333 }
334 }
335
336
337
338 /*----------------------------------------------------------------------------
339 ; Return nothing or data or data pointer
340 ----------------------------------------------------------------------------*/
341 return;
342 }
343
idct(int * blk,uint8 * pred,uint8 * dst,int width)344 void idct(
345 int *blk, uint8 *pred, uint8 *dst, int width)
346 {
347 /*----------------------------------------------------------------------------
348 ; Define all local variables
349 ----------------------------------------------------------------------------*/
350 int i;
351 int32 tmpBLK[64];
352 int32 *tmpBLK32 = &tmpBLK[0];
353 int32 r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
354 int32 a;
355 int res;
356
357 /*----------------------------------------------------------------------------
358 ; Function body here
359 ----------------------------------------------------------------------------*/
360 /* two dimensional inverse discrete cosine transform */
361
362
363 /* column (vertical) IDCT */
364 for (i = B_SIZE - 1; i >= 0; i--)
365 {
366 /* initialize butterfly nodes at first stage */
367
368 r1 = blk[B_SIZE * 4 + i] << 11;
369 /* since row IDCT results have net left shift by 3 */
370 /* this left shift by 8 gives net left shift by 11 */
371 /* in order to maintain the same scale as that of */
372 /* coefficients Wi */
373
374 r2 = blk[B_SIZE * 6 + i];
375 r3 = blk[B_SIZE * 2 + i];
376 r4 = blk[B_SIZE * 1 + i];
377 r5 = blk[B_SIZE * 7 + i];
378 r6 = blk[B_SIZE * 5 + i];
379 r7 = blk[B_SIZE * 3 + i];
380
381 if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
382 {
383 /* shortcut */
384 /* execute if values of g(r,1) to g(r,7) in a column*/
385 /* are all zeros */
386
387 /* make output of IDCT >>3 or scaled by 1/8 and */
388 /* with the proper rounding */
389 a = (blk[B_SIZE * 0 + i]) << 3;
390 tmpBLK32[B_SIZE * 0 + i] = a;
391 tmpBLK32[B_SIZE * 1 + i] = a;
392 tmpBLK32[B_SIZE * 2 + i] = a;
393 tmpBLK32[B_SIZE * 3 + i] = a;
394 tmpBLK32[B_SIZE * 4 + i] = a;
395 tmpBLK32[B_SIZE * 5 + i] = a;
396 tmpBLK32[B_SIZE * 6 + i] = a;
397 tmpBLK32[B_SIZE * 7 + i] = a;
398 }
399 else
400 {
401 r0 = (blk[8 * 0 + i] << 11) + 128;
402
403 /* first stage */
404
405 r8 = W7 * (r4 + r5);
406 r4 = (r8 + (W1 - W7) * r4);
407 /* Multiplication with Wi increases the net left */
408 /* shift from 11 to 14,we have to shift back by 3*/
409 r5 = (r8 - (W1 + W7) * r5);
410 r8 = W3 * (r6 + r7);
411 r6 = (r8 - (W3 - W5) * r6);
412 r7 = (r8 - (W3 + W5) * r7);
413
414 /* second stage */
415 r8 = r0 + r1;
416 r0 -= r1;
417
418 r1 = W6 * (r3 + r2);
419 r2 = (r1 - (W2 + W6) * r2);
420 r3 = (r1 + (W2 - W6) * r3);
421
422 r1 = r4 + r6;
423 r4 -= r6;
424 r6 = r5 + r7;
425 r5 -= r7;
426
427 /* third stage */
428 r7 = r8 + r3;
429 r8 -= r3;
430 r3 = r0 + r2;
431 r0 -= r2;
432 r2 = (181 * (r4 + r5) + 128) >> 8; /* rounding */
433 r4 = (181 * (r4 - r5) + 128) >> 8;
434
435 /* fourth stage */
436 /* net shift of IDCT is >>3 after the following */
437 /* shift operation, it makes output of 2-D IDCT */
438 /* scaled by 1/8, that is scaled twice by */
439 /* 1/(2*sqrt(2)) for row IDCT and column IDCT. */
440 /* see detail analysis in design doc. */
441 tmpBLK32[0 + i] = (r7 + r1) >> 8;
442 tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
443 tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
444 tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
445 tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
446 tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
447 tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
448 tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
449 }
450 }
451 /* row (horizontal) IDCT */
452 for (i = B_SIZE - 1; i >= 0; i--)
453 {
454 /* initialize butterfly nodes at the first stage */
455
456 r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
457 /* r1 left shift by 11 is to maintain the same */
458 /* scale as that of coefficients (W1,...W7) */
459 /* since blk[4] won't multiply with Wi. */
460 /* see detail diagram in design document. */
461
462 r2 = tmpBLK32[6+(i<<3)];
463 r3 = tmpBLK32[2+(i<<3)];
464 r4 = tmpBLK32[1+(i<<3)];
465 r5 = tmpBLK32[7+(i<<3)];
466 r6 = tmpBLK32[5+(i<<3)];
467 r7 = tmpBLK32[3+(i<<3)];
468
469 if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
470 {
471 /* shortcut */
472 /* execute if values of F(1,v) to F(7,v) in a row*/
473 /* are all zeros */
474
475 /* output of row IDCT scaled by 8 */
476 a = (tmpBLK32[0+(i<<3)] + 32) >> 6;
477 blk[0+(i<<3)] = a;
478 blk[1+(i<<3)] = a;
479 blk[2+(i<<3)] = a;
480 blk[3+(i<<3)] = a;
481 blk[4+(i<<3)] = a;
482 blk[5+(i<<3)] = a;
483 blk[6+(i<<3)] = a;
484 blk[7+(i<<3)] = a;
485
486 }
487
488 else
489 {
490 /* for proper rounding in the fourth stage */
491 r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
492
493 /* first stage */
494
495 r8 = W7 * (r4 + r5) + 4;
496 r4 = (r8 + (W1 - W7) * r4) >> 3;
497 r5 = (r8 - (W1 + W7) * r5) >> 3;
498
499 r8 = W3 * (r6 + r7) + 4;
500 r6 = (r8 - (W3 - W5) * r6) >> 3;
501 r7 = (r8 - (W3 + W5) * r7) >> 3;
502
503 /* second stage */
504 r8 = r0 + r1;
505 r0 -= r1;
506
507 r1 = W6 * (r3 + r2) + 4;
508 r2 = (r1 - (W2 + W6) * r2) >> 3;
509 r3 = (r1 + (W2 - W6) * r3) >> 3;
510
511 r1 = r4 + r6;
512 r4 -= r6;
513 r6 = r5 + r7;
514 r5 -= r7;
515
516 /* third stage */
517 r7 = r8 + r3;
518 r8 -= r3;
519 r3 = r0 + r2;
520 r0 -= r2;
521 r2 = (181 * (r4 + r5) + 128) >> 8; /* rounding */
522 r4 = (181 * (r4 - r5) + 128) >> 8;
523
524 /* fourth stage */
525 /* net shift of this function is <<3 after the */
526 /* following shift operation, it makes output of */
527 /* row IDCT scaled by 8 to retain 3 bits precision*/
528 blk[0+(i<<3)] = (r7 + r1) >> 14;
529 blk[1+(i<<3)] = (r3 + r2) >> 14;
530 blk[2+(i<<3)] = (r0 + r4) >> 14;
531 blk[3+(i<<3)] = (r8 + r6) >> 14;
532 blk[4+(i<<3)] = (r8 - r6) >> 14;
533 blk[5+(i<<3)] = (r0 - r4) >> 14;
534 blk[6+(i<<3)] = (r3 - r2) >> 14;
535 blk[7+(i<<3)] = (r7 - r1) >> 14;
536 }
537 /* add with prediction , 08/03/05 */
538 res = (*pred++ + block[0+(i<<3)]);
539 CLIP_RESULT(res);
540 *dst++ = res;
541 res = (*pred++ + block[1+(i<<3)]);
542 CLIP_RESULT(res);
543 *dst++ = res;
544 res = (*pred++ + block[2+(i<<3)]);
545 CLIP_RESULT(res);
546 *dst++ = res;
547 res = (*pred++ + block[3+(i<<3)]);
548 CLIP_RESULT(res);
549 *dst++ = res;
550 res = (*pred++ + block[4+(i<<3)]);
551 CLIP_RESULT(res);
552 *dst++ = res;
553 res = (*pred++ + block[5+(i<<3)]);
554 CLIP_RESULT(res);
555 *dst++ = res;
556 res = (*pred++ + block[6+(i<<3)]);
557 CLIP_RESULT(res);
558 *dst++ = res;
559 res = (*pred++ + block[7+(i<<3)]);
560 CLIP_RESULT(res);
561 *dst++ = res;
562
563 pred += 8;
564 dst += (width - 8);
565 }
566
567
568
569 /*----------------------------------------------------------------------------
570 ; Return nothing or data or data pointer
571 ----------------------------------------------------------------------------*/
572 return;
573 }
574
575 #endif
576 /*----------------------------------------------------------------------------
577 ; End Function: idct
578 ----------------------------------------------------------------------------*/
579
580