1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /*
19 ------------------------------------------------------------------------------
20 INPUT AND OUTPUT DEFINITIONS
21
22 Inputs:
23 [input_variable_name] = [description of the input to module, its type
24 definition, and length (when applicable)]
25
26 Local Stores/Buffers/Pointers Needed:
27 [local_store_name] = [description of the local store, its type
28 definition, and length (when applicable)]
29 [local_buffer_name] = [description of the local buffer, its type
30 definition, and length (when applicable)]
31 [local_ptr_name] = [description of the local pointer, its type
32 definition, and length (when applicable)]
33
34 Global Stores/Buffers/Pointers Needed:
35 [global_store_name] = [description of the global store, its type
36 definition, and length (when applicable)]
37 [global_buffer_name] = [description of the global buffer, its type
38 definition, and length (when applicable)]
39 [global_ptr_name] = [description of the global pointer, its type
40 definition, and length (when applicable)]
41
42 Outputs:
43 [return_variable_name] = [description of data/pointer returned
44 by module, its type definition, and length
45 (when applicable)]
46
47 Pointers and Buffers Modified:
48 [variable_bfr_ptr] points to the [describe where the
49 variable_bfr_ptr points to, its type definition, and length
50 (when applicable)]
51 [variable_bfr] contents are [describe the new contents of
52 variable_bfr]
53
54 Local Stores Modified:
55 [local_store_name] = [describe new contents, its type
56 definition, and length (when applicable)]
57
58 Global Stores Modified:
59 [global_store_name] = [describe new contents, its type
60 definition, and length (when applicable)]
61
62 ------------------------------------------------------------------------------
63 FUNCTION DESCRIPTION
64
65 ------------------------------------------------------------------------------
66 REQUIREMENTS
67
68 ------------------------------------------------------------------------------
69 REFERENCES
70
71 ------------------------------------------------------------------------------
72 PSEUDO-CODE
73
74 ------------------------------------------------------------------------------
75 RESOURCES USED
76 When the code is written for a specific target processor the
77 the resources used should be documented below.
78
79 STACK USAGE: [stack count for this module] + [variable to represent
80 stack usage for each subroutine called]
81
82 where: [stack usage variable] = stack usage for [subroutine
83 name] (see [filename].ext)
84
85 DATA MEMORY USED: x words
86
87 PROGRAM MEMORY USED: x words
88
89 CLOCK CYCLES: [cycle count equation for this module] + [variable
90 used to represent cycle count for each subroutine
91 called]
92
93 where: [cycle count variable] = cycle count for [subroutine
94 name] (see [filename].ext)
95
96 ------------------------------------------------------------------------------
97 */
98
99 /*----------------------------------------------------------------------------
100 ; INCLUDES
101 ----------------------------------------------------------------------------*/
102 #include "mp4dec_lib.h"
103 #include "idct.h"
104 #include "motion_comp.h"
105
106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
107 /*----------------------------------------------------------------------------
108 ; MACROS
109 ; Define module specific macros here
110 ----------------------------------------------------------------------------*/
111
112 /*----------------------------------------------------------------------------
113 ; DEFINES
114 ; Include all pre-processor statements here. Include conditional
115 ; compile variables also.
116 ----------------------------------------------------------------------------*/
117
118 /*----------------------------------------------------------------------------
119 ; LOCAL FUNCTION DEFINITIONS
120 ; Function Prototype declaration
121 ----------------------------------------------------------------------------*/
122 /* private prototypes */
123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
125 static void idctcol(int16 *blk);
126
127 #ifdef FAST_IDCT
128 // mapping from nz_coefs to functions to be used
129
130
131 // ARM4 does not allow global data when they are not constant hence
132 // an array of function pointers cannot be considered as array of constants
133 // (actual addresses are only known when the dll is loaded).
134 // So instead of arrays of function pointers, we'll store here
135 // arrays of rows or columns and then call the idct function
136 // corresponding to such the row/column number:
137
138
139 static void (*const idctcolVCA[10][4])(int16*) =
140 {
141 {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
142 {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
143 {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
144 {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
145 {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
146 {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
147 {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
148 {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
149 {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
150 {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
151 };
152
153
154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
155 {
156 &idctrow1,
157 &idctrow2,
158 &idctrow2,
159 &idctrow2,
160 &idctrow2,
161 &idctrow3,
162 &idctrow4,
163 &idctrow4,
164 &idctrow4,
165 &idctrow4
166 };
167
168
169 static void (*const idctcolVCA2[16])(int16*) =
170 {
171 &idctcol0, &idctcol4, &idctcol3, &idctcol4,
172 &idctcol2, &idctcol4, &idctcol3, &idctcol4,
173 &idctcol1, &idctcol4, &idctcol3, &idctcol4,
174 &idctcol2, &idctcol4, &idctcol3, &idctcol4
175 };
176
177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
178 {
179 &idctrow1, &idctrow4, &idctrow3, &idctrow4,
180 &idctrow2, &idctrow4, &idctrow3, &idctrow4
181 };
182
183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
184 {
185 &idctrow1_intra,
186 &idctrow2_intra,
187 &idctrow2_intra,
188 &idctrow2_intra,
189 &idctrow2_intra,
190 &idctrow3_intra,
191 &idctrow4_intra,
192 &idctrow4_intra,
193 &idctrow4_intra,
194 &idctrow4_intra
195 };
196
197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
198 {
199 &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
200 &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
201 };
202 #endif
203
204 /*----------------------------------------------------------------------------
205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
206 ; Variable declaration - defined here and used outside this module
207 ----------------------------------------------------------------------------*/
208
209 /*----------------------------------------------------------------------------
210 ; EXTERNAL FUNCTION REFERENCES
211 ; Declare functions defined elsewhere and referenced in this module
212 ----------------------------------------------------------------------------*/
213
214 /*----------------------------------------------------------------------------
215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
216 ; Declare variables used in this module but defined elsewhere
217 ----------------------------------------------------------------------------*/
218
219 /*----------------------------------------------------------------------------
220 ; FUNCTION CODE
221 ----------------------------------------------------------------------------*/
MBlockIDCT(VideoDecData * video)222 void MBlockIDCT(VideoDecData *video)
223 {
224 Vop *currVop = video->currVop;
225 MacroBlock *mblock = video->mblock;
226 PIXEL *c_comp;
227 PIXEL *cu_comp;
228 PIXEL *cv_comp;
229 int x_pos = video->mbnum_col;
230 int y_pos = video->mbnum_row;
231 int width, width_uv;
232 int32 offset;
233 width = video->width;
234 width_uv = width >> 1;
235 offset = (int32)(y_pos << 4) * width + (x_pos << 4);
236
237 c_comp = currVop->yChan + offset;
238 cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
239 cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
240
241 BlockIDCT_intra(mblock, c_comp, 0, width);
242 BlockIDCT_intra(mblock, c_comp + 8, 1, width);
243 BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
244 BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
245 BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
246 BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
247 }
248
249
BlockIDCT_intra(MacroBlock * mblock,PIXEL * c_comp,int comp,int width)250 void BlockIDCT_intra(
251 MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
252 {
253 /*----------------------------------------------------------------------------
254 ; Define all local variables
255 ----------------------------------------------------------------------------*/
256 int16 *coeff_in = mblock->block[comp];
257 #ifdef INTEGER_IDCT
258 #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/
259 int i, bmapr;
260 int nz_coefs = mblock->no_coeff[comp];
261 uint8 *bitmapcol = mblock->bitmapcol[comp];
262 uint8 bitmaprow = mblock->bitmaprow[comp];
263
264 /*----------------------------------------------------------------------------
265 ; Function body here
266 ----------------------------------------------------------------------------*/
267 if (nz_coefs <= 10)
268 {
269 bmapr = (nz_coefs - 1);
270
271 (*(idctcolVCA[bmapr]))(coeff_in);
272 (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
273 (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
274 (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
275
276 (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
277 }
278 else
279 {
280 i = 8;
281 while (i--)
282 {
283 bmapr = (int)bitmapcol[i];
284 if (bmapr)
285 {
286 if ((bmapr&0xf) == 0) /* 07/18/01 */
287 {
288 (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
289 }
290 else
291 {
292 idctcol(coeff_in + i);
293 }
294 }
295 }
296 if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
297 {
298 bitmaprow >>= 4;
299 (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
300 }
301 else
302 {
303 idctrow_intra(coeff_in, c_comp, width);
304 }
305 }
306 #else
307 void idct_intra(int *block, uint8 *comp, int width);
308 idct_intra(coeff_in, c_comp, width);
309 #endif
310 #else
311 void idctref_intra(int *block, uint8 *comp, int width);
312 idctref_intra(coeff_in, c_comp, width);
313 #endif
314
315
316 /*----------------------------------------------------------------------------
317 ; Return nothing or data or data pointer
318 ----------------------------------------------------------------------------*/
319 return;
320 }
321
322 /* 08/04/05, no residue, just copy from pred to output */
Copy_Blk_to_Vop(uint8 * dst,uint8 * pred,int width)323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
324 {
325 /* copy 4 bytes at a time */
326 width -= 4;
327 *((uint32*)dst) = *((uint32*)pred);
328 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
329 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
330 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
331 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
332 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
333 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
334 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
335 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
336 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
337 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
338 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
339 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
340 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
341 *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
342 *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
343
344 return ;
345 }
346
347 /* 08/04/05 compute IDCT and add prediction at the end */
BlockIDCT(uint8 * dst,uint8 * pred,int16 * coeff_in,int width,int nz_coefs,uint8 * bitmapcol,uint8 bitmaprow)348 void BlockIDCT(
349 uint8 *dst, /* destination */
350 uint8 *pred, /* prediction block, pitch 16 */
351 int16 *coeff_in, /* DCT data, size 64 */
352 int width, /* width of dst */
353 int nz_coefs,
354 uint8 *bitmapcol,
355 uint8 bitmaprow
356 )
357 {
358 #ifdef INTEGER_IDCT
359 #ifdef FAST_IDCT /* VCA IDCT using nzcoefs and bitmaps*/
360 int i, bmapr;
361 /*----------------------------------------------------------------------------
362 ; Function body here
363 ----------------------------------------------------------------------------*/
364 if (nz_coefs <= 10)
365 {
366 bmapr = (nz_coefs - 1);
367 (*(idctcolVCA[bmapr]))(coeff_in);
368 (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
369 (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
370 (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
371
372 (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
373 return ;
374 }
375 else
376 {
377 i = 8;
378
379 while (i--)
380 {
381 bmapr = (int)bitmapcol[i];
382 if (bmapr)
383 {
384 if ((bmapr&0xf) == 0) /* 07/18/01 */
385 {
386 (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
387 }
388 else
389 {
390 idctcol(coeff_in + i);
391 }
392 }
393 }
394 if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
395 {
396 (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
397 }
398 else
399 {
400 idctrow(coeff_in, pred, dst, width);
401 }
402 return ;
403 }
404 #else // FAST_IDCT
405 void idct(int *block, uint8 *pred, uint8 *dst, int width);
406 idct(coeff_in, pred, dst, width);
407 return;
408 #endif // FAST_IDCT
409 #else // INTEGER_IDCT
410 void idctref(int *block, uint8 *pred, uint8 *dst, int width);
411 idctref(coeff_in, pred, dst, width);
412 return;
413 #endif // INTEGER_IDCT
414
415 }
416 /*----------------------------------------------------------------------------
417 ; End Function: block_idct
418 ----------------------------------------------------------------------------*/
419
420
421 /****************************************************************************/
422
423 /*
424 ------------------------------------------------------------------------------
425 FUNCTION NAME: idctrow
426 ------------------------------------------------------------------------------
427 INPUT AND OUTPUT DEFINITIONS FOR idctrow
428
429 Inputs:
430 [input_variable_name] = [description of the input to module, its type
431 definition, and length (when applicable)]
432
433 Local Stores/Buffers/Pointers Needed:
434 [local_store_name] = [description of the local store, its type
435 definition, and length (when applicable)]
436 [local_buffer_name] = [description of the local buffer, its type
437 definition, and length (when applicable)]
438 [local_ptr_name] = [description of the local pointer, its type
439 definition, and length (when applicable)]
440
441 Global Stores/Buffers/Pointers Needed:
442 [global_store_name] = [description of the global store, its type
443 definition, and length (when applicable)]
444 [global_buffer_name] = [description of the global buffer, its type
445 definition, and length (when applicable)]
446 [global_ptr_name] = [description of the global pointer, its type
447 definition, and length (when applicable)]
448
449 Outputs:
450 [return_variable_name] = [description of data/pointer returned
451 by module, its type definition, and length
452 (when applicable)]
453
454 Pointers and Buffers Modified:
455 [variable_bfr_ptr] points to the [describe where the
456 variable_bfr_ptr points to, its type definition, and length
457 (when applicable)]
458 [variable_bfr] contents are [describe the new contents of
459 variable_bfr]
460
461 Local Stores Modified:
462 [local_store_name] = [describe new contents, its type
463 definition, and length (when applicable)]
464
465 Global Stores Modified:
466 [global_store_name] = [describe new contents, its type
467 definition, and length (when applicable)]
468
469 ------------------------------------------------------------------------------
470 FUNCTION DESCRIPTION FOR idctrow
471
472 ------------------------------------------------------------------------------
473 REQUIREMENTS FOR idctrow
474
475 ------------------------------------------------------------------------------
476 REFERENCES FOR idctrow
477
478 ------------------------------------------------------------------------------
479 PSEUDO-CODE FOR idctrow
480
481 ------------------------------------------------------------------------------
482 RESOURCES USED FOR idctrow
483 When the code is written for a specific target processor the
484 the resources used should be documented below.
485
486 STACK USAGE: [stack count for this module] + [variable to represent
487 stack usage for each subroutine called]
488
489 where: [stack usage variable] = stack usage for [subroutine
490 name] (see [filename].ext)
491
492 DATA MEMORY USED: x words
493
494 PROGRAM MEMORY USED: x words
495
496 CLOCK CYCLES: [cycle count equation for this module] + [variable
497 used to represent cycle count for each subroutine
498 called]
499
500 where: [cycle count variable] = cycle count for [subroutine
501 name] (see [filename].ext)
502
503 ------------------------------------------------------------------------------
504 */
505
506 /*----------------------------------------------------------------------------
507 ; Function Code FOR idctrow
508 ----------------------------------------------------------------------------*/
509 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow(int16 * blk,uint8 * pred,uint8 * dst,int width)510 void idctrow(
511 int16 *blk, uint8 *pred, uint8 *dst, int width
512 )
513 {
514 /*----------------------------------------------------------------------------
515 ; Define all local variables
516 ----------------------------------------------------------------------------*/
517 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
518 int i = 8;
519 uint32 pred_word, dst_word;
520 int res, res2;
521
522 /*----------------------------------------------------------------------------
523 ; Function body here
524 ----------------------------------------------------------------------------*/
525 /* row (horizontal) IDCT
526 *
527 * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
528 * ( k + - ) * l ) l=0 8 2
529 *
530 * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
531
532 /* preset the offset, such that we can take advantage pre-offset addressing mode */
533 width -= 4;
534 dst -= width;
535 pred -= 12;
536 blk -= 8;
537
538 while (i--)
539 {
540 x1 = (int32)blk[12] << 8;
541 blk[12] = 0;
542 x2 = blk[14];
543 blk[14] = 0;
544 x3 = blk[10];
545 blk[10] = 0;
546 x4 = blk[9];
547 blk[9] = 0;
548 x5 = blk[15];
549 blk[15] = 0;
550 x6 = blk[13];
551 blk[13] = 0;
552 x7 = blk[11];
553 blk[11] = 0;
554 x0 = ((*(blk += 8)) << 8) + 8192;
555 blk[0] = 0; /* for proper rounding in the fourth stage */
556
557 /* first stage */
558 x8 = W7 * (x4 + x5) + 4;
559 x4 = (x8 + (W1 - W7) * x4) >> 3;
560 x5 = (x8 - (W1 + W7) * x5) >> 3;
561 x8 = W3 * (x6 + x7) + 4;
562 x6 = (x8 - (W3 - W5) * x6) >> 3;
563 x7 = (x8 - (W3 + W5) * x7) >> 3;
564
565 /* second stage */
566 x8 = x0 + x1;
567 x0 -= x1;
568 x1 = W6 * (x3 + x2) + 4;
569 x2 = (x1 - (W2 + W6) * x2) >> 3;
570 x3 = (x1 + (W2 - W6) * x3) >> 3;
571 x1 = x4 + x6;
572 x4 -= x6;
573 x6 = x5 + x7;
574 x5 -= x7;
575
576 /* third stage */
577 x7 = x8 + x3;
578 x8 -= x3;
579 x3 = x0 + x2;
580 x0 -= x2;
581 x2 = (181 * (x4 + x5) + 128) >> 8;
582 x4 = (181 * (x4 - x5) + 128) >> 8;
583
584 /* fourth stage */
585 pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
586
587 res = (x7 + x1) >> 14;
588 ADD_AND_CLIP1(res);
589 res2 = (x3 + x2) >> 14;
590 ADD_AND_CLIP2(res2);
591 dst_word = (res2 << 8) | res;
592 res = (x0 + x4) >> 14;
593 ADD_AND_CLIP3(res);
594 dst_word |= (res << 16);
595 res = (x8 + x6) >> 14;
596 ADD_AND_CLIP4(res);
597 dst_word |= (res << 24);
598 *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
599
600 pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
601
602 res = (x8 - x6) >> 14;
603 ADD_AND_CLIP1(res);
604 res2 = (x0 - x4) >> 14;
605 ADD_AND_CLIP2(res2);
606 dst_word = (res2 << 8) | res;
607 res = (x3 - x2) >> 14;
608 ADD_AND_CLIP3(res);
609 dst_word |= (res << 16);
610 res = (x7 - x1) >> 14;
611 ADD_AND_CLIP4(res);
612 dst_word |= (res << 24);
613 *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
614 }
615 /*----------------------------------------------------------------------------
616 ; Return nothing or data or data pointer
617 ----------------------------------------------------------------------------*/
618 return;
619 }
620
621 __attribute__((no_sanitize("signed-integer-overflow")))
idctrow_intra(int16 * blk,PIXEL * comp,int width)622 void idctrow_intra(
623 int16 *blk, PIXEL *comp, int width
624 )
625 {
626 /*----------------------------------------------------------------------------
627 ; Define all local variables
628 ----------------------------------------------------------------------------*/
629 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
630 int i = 8;
631 int offset = width;
632 int32 word;
633
634 /*----------------------------------------------------------------------------
635 ; Function body here
636 ----------------------------------------------------------------------------*/
637 /* row (horizontal) IDCT
638 *
639 * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- *
640 * ( k + - ) * l ) l=0 8 2
641 *
642 * where: c[0] = 128 c[1..7] = 128*sqrt(2) */
643 while (i--)
644 {
645 x1 = (int32)blk[4] << 8;
646 blk[4] = 0;
647 x2 = blk[6];
648 blk[6] = 0;
649 x3 = blk[2];
650 blk[2] = 0;
651 x4 = blk[1];
652 blk[1] = 0;
653 x5 = blk[7];
654 blk[7] = 0;
655 x6 = blk[5];
656 blk[5] = 0;
657 x7 = blk[3];
658 blk[3] = 0;
659 #ifndef FAST_IDCT
660 /* shortcut */ /* covered by idctrow1 01/9/2001 */
661 if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
662 {
663 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
664 return;
665 }
666 #endif
667 x0 = ((int32)blk[0] << 8) + 8192;
668 blk[0] = 0; /* for proper rounding in the fourth stage */
669
670 /* first stage */
671 x8 = W7 * (x4 + x5) + 4;
672 x4 = (x8 + (W1 - W7) * x4) >> 3;
673 x5 = (x8 - (W1 + W7) * x5) >> 3;
674 x8 = W3 * (x6 + x7) + 4;
675 x6 = (x8 - (W3 - W5) * x6) >> 3;
676 x7 = (x8 - (W3 + W5) * x7) >> 3;
677
678 /* second stage */
679 x8 = x0 + x1;
680 x0 -= x1;
681 x1 = W6 * (x3 + x2) + 4;
682 x2 = (x1 - (W2 + W6) * x2) >> 3;
683 x3 = (x1 + (W2 - W6) * x3) >> 3;
684 x1 = x4 + x6;
685 x4 -= x6;
686 x6 = x5 + x7;
687 x5 -= x7;
688
689 /* third stage */
690 x7 = x8 + x3;
691 x8 -= x3;
692 x3 = x0 + x2;
693 x0 -= x2;
694 x2 = (181 * (x4 + x5) + 128) >> 8;
695 x4 = (181 * (x4 - x5) + 128) >> 8;
696
697 /* fourth stage */
698 word = ((x7 + x1) >> 14);
699 CLIP_RESULT(word)
700
701 temp = ((x3 + x2) >> 14);
702 CLIP_RESULT(temp)
703 word = word | (temp << 8);
704
705 temp = ((x0 + x4) >> 14);
706 CLIP_RESULT(temp)
707 word = word | (temp << 16);
708
709 temp = ((x8 + x6) >> 14);
710 CLIP_RESULT(temp)
711 word = word | (temp << 24);
712 *((int32*)(comp)) = word;
713
714 word = ((x8 - x6) >> 14);
715 CLIP_RESULT(word)
716
717 temp = ((x0 - x4) >> 14);
718 CLIP_RESULT(temp)
719 word = word | (temp << 8);
720
721 temp = ((x3 - x2) >> 14);
722 CLIP_RESULT(temp)
723 word = word | (temp << 16);
724
725 temp = ((x7 - x1) >> 14);
726 CLIP_RESULT(temp)
727 word = word | (temp << 24);
728 *((int32*)(comp + 4)) = word;
729 comp += offset;
730
731 blk += B_SIZE;
732 }
733 /*----------------------------------------------------------------------------
734 ; Return nothing or data or data pointer
735 ----------------------------------------------------------------------------*/
736 return;
737 }
738
739 /*----------------------------------------------------------------------------
740 ; End Function: idctrow
741 ----------------------------------------------------------------------------*/
742
743
744 /****************************************************************************/
745
746 /*
747 ------------------------------------------------------------------------------
748 FUNCTION NAME: idctcol
749 ------------------------------------------------------------------------------
750 INPUT AND OUTPUT DEFINITIONS FOR idctcol
751
752 Inputs:
753 [input_variable_name] = [description of the input to module, its type
754 definition, and length (when applicable)]
755
756 Local Stores/Buffers/Pointers Needed:
757 [local_store_name] = [description of the local store, its type
758 definition, and length (when applicable)]
759 [local_buffer_name] = [description of the local buffer, its type
760 definition, and length (when applicable)]
761 [local_ptr_name] = [description of the local pointer, its type
762 definition, and length (when applicable)]
763
764 Global Stores/Buffers/Pointers Needed:
765 [global_store_name] = [description of the global store, its type
766 definition, and length (when applicable)]
767 [global_buffer_name] = [description of the global buffer, its type
768 definition, and length (when applicable)]
769 [global_ptr_name] = [description of the global pointer, its type
770 definition, and length (when applicable)]
771
772 Outputs:
773 [return_variable_name] = [description of data/pointer returned
774 by module, its type definition, and length
775 (when applicable)]
776
777 Pointers and Buffers Modified:
778 [variable_bfr_ptr] points to the [describe where the
779 variable_bfr_ptr points to, its type definition, and length
780 (when applicable)]
781 [variable_bfr] contents are [describe the new contents of
782 variable_bfr]
783
784 Local Stores Modified:
785 [local_store_name] = [describe new contents, its type
786 definition, and length (when applicable)]
787
788 Global Stores Modified:
789 [global_store_name] = [describe new contents, its type
790 definition, and length (when applicable)]
791
792 ------------------------------------------------------------------------------
793 FUNCTION DESCRIPTION FOR idctcol
794
795 ------------------------------------------------------------------------------
796 REQUIREMENTS FOR idctcol
797
798 ------------------------------------------------------------------------------
799 REFERENCES FOR idctcol
800
801 ------------------------------------------------------------------------------
802 PSEUDO-CODE FOR idctcol
803
804 ------------------------------------------------------------------------------
805 RESOURCES USED FOR idctcol
806 When the code is written for a specific target processor the
807 the resources used should be documented below.
808
809 STACK USAGE: [stack count for this module] + [variable to represent
810 stack usage for each subroutine called]
811
812 where: [stack usage variable] = stack usage for [subroutine
813 name] (see [filename].ext)
814
815 DATA MEMORY USED: x words
816
817 PROGRAM MEMORY USED: x words
818
819 CLOCK CYCLES: [cycle count equation for this module] + [variable
820 used to represent cycle count for each subroutine
821 called]
822
823 where: [cycle count variable] = cycle count for [subroutine
824 name] (see [filename].ext)
825
826 ------------------------------------------------------------------------------
827 */
828
829 /*----------------------------------------------------------------------------
830 ; Function Code FOR idctcol
831 ----------------------------------------------------------------------------*/
832 __attribute__((no_sanitize("signed-integer-overflow")))
idctcol(int16 * blk)833 void idctcol(
834 int16 *blk
835 )
836 {
837 /*----------------------------------------------------------------------------
838 ; Define all local variables
839 ----------------------------------------------------------------------------*/
840 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
841
842 /*----------------------------------------------------------------------------
843 ; Function body here
844 ----------------------------------------------------------------------------*/
845 /* column (vertical) IDCT
846 *
847 * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] *
848 * cos( -- * ( k + - ) * l ) l=0 8 2
849 *
850 * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
851 x1 = (int32)blk[32] << 11;
852 x2 = blk[48];
853 x3 = blk[16];
854 x4 = blk[8];
855 x5 = blk[56];
856 x6 = blk[40];
857 x7 = blk[24];
858 #ifndef FAST_IDCT
859 /* shortcut */ /* covered by idctcolumn1 01/9/2001 */
860 if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
861 {
862 blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
863 = blk[0] << 3;
864 return;
865 }
866 #endif
867
868 x0 = ((int32)blk[0] << 11) + 128;
869
870 /* first stage */
871 x8 = W7 * (x4 + x5);
872 x4 = x8 + (W1 - W7) * x4;
873 x5 = x8 - (W1 + W7) * x5;
874 x8 = W3 * (x6 + x7);
875 x6 = x8 - (W3 - W5) * x6;
876 x7 = x8 - (W3 + W5) * x7;
877
878 /* second stage */
879 x8 = x0 + x1;
880 x0 -= x1;
881 x1 = W6 * (x3 + x2);
882 x2 = x1 - (W2 + W6) * x2;
883 x3 = x1 + (W2 - W6) * x3;
884 x1 = x4 + x6;
885 x4 -= x6;
886 x6 = x5 + x7;
887 x5 -= x7;
888
889 /* third stage */
890 x7 = x8 + x3;
891 x8 -= x3;
892 x3 = x0 + x2;
893 x0 -= x2;
894 x2 = (181 * (x4 + x5) + 128) >> 8;
895 x4 = (181 * (x4 - x5) + 128) >> 8;
896
897 /* fourth stage */
898 blk[0] = (x7 + x1) >> 8;
899 blk[8] = (x3 + x2) >> 8;
900 blk[16] = (x0 + x4) >> 8;
901 blk[24] = (x8 + x6) >> 8;
902 blk[32] = (x8 - x6) >> 8;
903 blk[40] = (x0 - x4) >> 8;
904 blk[48] = (x3 - x2) >> 8;
905 blk[56] = (x7 - x1) >> 8;
906 /*----------------------------------------------------------------------------
907 ; Return nothing or data or data pointer
908 ----------------------------------------------------------------------------*/
909 return;
910 }
911 /*----------------------------------------------------------------------------
912 ; End Function: idctcol
913 ----------------------------------------------------------------------------*/
914
915