• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "mp4def.h"
19 #include "idct.h"
20 #include "motion_comp.h"
21 
22 #ifdef FAST_IDCT
23 
24 /****************************************************************
25 *       vca_idct.c : created 6/1/99 for several options
26 *                     of hard-coded reduced idct function (using nz_coefs)
27 ******************************************************************/
28 
29 /*****************************************************/
30 //pretested version
idctrow0(int16 * blk,uint8 * pred,uint8 * dst,int width)31 void idctrow0(int16 *blk, uint8 *pred, uint8 *dst, int width)
32 {
33     OSCL_UNUSED_ARG(blk);
34     OSCL_UNUSED_ARG(width);
35     OSCL_UNUSED_ARG(dst);
36     OSCL_UNUSED_ARG(pred);
37     return ;
38 }
idctcol0(int16 * blk)39 void idctcol0(int16 *blk)
40 {
41     OSCL_UNUSED_ARG(blk);
42     return ;
43 }
44 
idctrow1(int16 * blk,uint8 * pred,uint8 * dst,int width)45 void idctrow1(int16 *blk, uint8 *pred, uint8 *dst, int width)
46 {
47     /* shortcut */
48     int tmp;
49     int i = 8;
50     uint32 pred_word, dst_word;
51     int res, res2;
52 
53     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
54     width -= 4;
55     dst -= width;
56     pred -= 12;
57     blk -= 8;
58 
59     while (i--)
60     {
61         tmp = (*(blk += 8) + 32) >> 6;
62         *blk = 0;
63 
64         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
65         res = tmp + (pred_word & 0xFF);
66         CLIP_RESULT(res);
67         res2 = tmp + ((pred_word >> 8) & 0xFF);
68         CLIP_RESULT(res2);
69         dst_word = (res2 << 8) | res;
70         res = tmp + ((pred_word >> 16) & 0xFF);
71         CLIP_RESULT(res);
72         dst_word |= (res << 16);
73         res = tmp + ((pred_word >> 24) & 0xFF);
74         CLIP_RESULT(res);
75         dst_word |= (res << 24);
76         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
77 
78         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
79         res = tmp + (pred_word & 0xFF);
80         CLIP_RESULT(res);
81         res2 = tmp + ((pred_word >> 8) & 0xFF);
82         CLIP_RESULT(res2);
83         dst_word = (res2 << 8) | res;
84         res = tmp + ((pred_word >> 16) & 0xFF);
85         CLIP_RESULT(res);
86         dst_word |= (res << 16);
87         res = tmp + ((pred_word >> 24) & 0xFF);
88         CLIP_RESULT(res);
89         dst_word |= (res << 24);
90         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
91     }
92     return;
93 }
94 
idctcol1(int16 * blk)95 void idctcol1(int16 *blk)
96 { /* shortcut */
97     blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56] =
98                                               blk[0] << 3;
99     return;
100 }
101 
idctrow2(int16 * blk,uint8 * pred,uint8 * dst,int width)102 void idctrow2(int16 *blk, uint8 *pred, uint8 *dst, int width)
103 {
104     int32 x0, x1, x2, x4, x5;
105     int i = 8;
106     uint32 pred_word, dst_word;
107     int res, res2;
108 
109     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
110     width -= 4;
111     dst -= width;
112     pred -= 12;
113     blk -= 8;
114 
115     while (i--)
116     {
117         /* shortcut */
118         x4 = blk[9];
119         blk[9] = 0;
120         x0 = ((*(blk += 8)) << 8) + 8192;
121         *blk = 0;  /* for proper rounding in the fourth stage */
122 
123         /* first stage */
124         x5 = (W7 * x4 + 4) >> 3;
125         x4 = (W1 * x4 + 4) >> 3;
126 
127         /* third stage */
128         x2 = (181 * (x4 + x5) + 128) >> 8;
129         x1 = (181 * (x4 - x5) + 128) >> 8;
130 
131         /* fourth stage */
132         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
133         res = (x0 + x4) >> 14;
134         ADD_AND_CLIP1(res);
135         res2 = (x0 + x2) >> 14;
136         ADD_AND_CLIP2(res2);
137         dst_word = (res2 << 8) | res;
138         res = (x0 + x1) >> 14;
139         ADD_AND_CLIP3(res);
140         dst_word |= (res << 16);
141         res = (x0 + x5) >> 14;
142         ADD_AND_CLIP4(res);
143         dst_word |= (res << 24);
144         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
145 
146         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
147         res = (x0 - x5) >> 14;
148         ADD_AND_CLIP1(res);
149         res2 = (x0 - x1) >> 14;
150         ADD_AND_CLIP2(res2);
151         dst_word = (res2 << 8) | res;
152         res = (x0 - x2) >> 14;
153         ADD_AND_CLIP3(res);
154         dst_word |= (res << 16);
155         res = (x0 - x4) >> 14;
156         ADD_AND_CLIP4(res);
157         dst_word |= (res << 24);
158         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
159     }
160     return ;
161 }
162 
idctcol2(int16 * blk)163 void idctcol2(int16 *blk)
164 {
165     int32 x0, x1, x3, x5, x7;//, x8;
166 
167     x1 = blk[8];
168     x0 = ((int32)blk[0] << 11) + 128;
169     /* both upper and lower*/
170 
171     x7 = W7 * x1;
172     x1 = W1 * x1;
173 
174     x3 = x7;
175     x5 = (181 * (x1 - x7) + 128) >> 8;
176     x7 = (181 * (x1 + x7) + 128) >> 8;
177 
178     blk[0] = (x0 + x1) >> 8;
179     blk[8] = (x0 + x7) >> 8;
180     blk[16] = (x0 + x5) >> 8;
181     blk[24] = (x0 + x3) >> 8;
182     blk[56] = (x0 - x1) >> 8;
183     blk[48] = (x0 - x7) >> 8;
184     blk[40] = (x0 - x5) >> 8;
185     blk[32] = (x0 - x3) >> 8;
186 
187     return ;
188 }
189 
idctrow3(int16 * blk,uint8 * pred,uint8 * dst,int width)190 void idctrow3(int16 *blk, uint8 *pred, uint8 *dst, int width)
191 {
192     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
193     int i = 8;
194     uint32 pred_word, dst_word;
195     int res, res2;
196 
197     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
198     width -= 4;
199     dst -= width;
200     pred -= 12;
201     blk -= 8;
202 
203     while (i--)
204     {
205         x2 = blk[10];
206         blk[10] = 0;
207         x1 = blk[9];
208         blk[9] = 0;
209         x0 = ((*(blk += 8)) << 8) + 8192;
210         *blk = 0;   /* for proper rounding in the fourth stage */
211         /* both upper and lower*/
212         /* both x2orx6 and x0orx4 */
213 
214         x4 = x0;
215         x6 = (W6 * x2 + 4) >> 3;
216         x2 = (W2 * x2 + 4) >> 3;
217         x8 = x0 - x2;
218         x0 += x2;
219         x2 = x8;
220         x8 = x4 - x6;
221         x4 += x6;
222         x6 = x8;
223 
224         x7 = (W7 * x1 + 4) >> 3;
225         x1 = (W1 * x1 + 4) >> 3;
226         x3 = x7;
227         x5 = (181 * (x1 - x7) + 128) >> 8;
228         x7 = (181 * (x1 + x7) + 128) >> 8;
229 
230         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
231         res = (x0 + x1) >> 14;
232         ADD_AND_CLIP1(res);
233         res2 = (x4 + x7) >> 14;
234         ADD_AND_CLIP2(res2);
235         dst_word = (res2 << 8) | res;
236         res = (x6 + x5) >> 14;
237         ADD_AND_CLIP3(res);
238         dst_word |= (res << 16);
239         res = (x2 + x3) >> 14;
240         ADD_AND_CLIP4(res);
241         dst_word |= (res << 24);
242         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
243 
244         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
245         res = (x2 - x3) >> 14;
246         ADD_AND_CLIP1(res);
247         res2 = (x6 - x5) >> 14;
248         ADD_AND_CLIP2(res2);
249         dst_word = (res2 << 8) | res;
250         res = (x4 - x7) >> 14;
251         ADD_AND_CLIP3(res);
252         dst_word |= (res << 16);
253         res = (x0 - x1) >> 14;
254         ADD_AND_CLIP4(res);
255         dst_word |= (res << 24);
256         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
257     }
258 
259     return ;
260 }
261 
idctcol3(int16 * blk)262 void idctcol3(int16 *blk)
263 {
264     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
265 
266     x2 = blk[16];
267     x1 = blk[8];
268     x0 = ((int32)blk[0] << 11) + 128;
269 
270     x4 = x0;
271     x6 = W6 * x2;
272     x2 = W2 * x2;
273     x8 = x0 - x2;
274     x0 += x2;
275     x2 = x8;
276     x8 = x4 - x6;
277     x4 += x6;
278     x6 = x8;
279 
280     x7 = W7 * x1;
281     x1 = W1 * x1;
282     x3 = x7;
283     x5 = (181 * (x1 - x7) + 128) >> 8;
284     x7 = (181 * (x1 + x7) + 128) >> 8;
285 
286     blk[0] = (x0 + x1) >> 8;
287     blk[8] = (x4 + x7) >> 8;
288     blk[16] = (x6 + x5) >> 8;
289     blk[24] = (x2 + x3) >> 8;
290     blk[56] = (x0 - x1) >> 8;
291     blk[48] = (x4 - x7) >> 8;
292     blk[40] = (x6 - x5) >> 8;
293     blk[32] = (x2 - x3) >> 8;
294 
295     return;
296 }
297 
298 
idctrow4(int16 * blk,uint8 * pred,uint8 * dst,int width)299 void idctrow4(int16 *blk, uint8 *pred, uint8 *dst, int width)
300 {
301     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
302     int i = 8;
303     uint32 pred_word, dst_word;
304     int res, res2;
305 
306     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
307     width -= 4;
308     dst -= width;
309     pred -= 12;
310     blk -= 8;
311 
312     while (i--)
313     {
314         x2 = blk[10];
315         blk[10] = 0;
316         x1 = blk[9];
317         blk[9] = 0;
318         x3 = blk[11];
319         blk[11] = 0;
320         x0 = ((*(blk += 8)) << 8) + 8192;
321         *blk = 0;    /* for proper rounding in the fourth stage */
322 
323         x4 = x0;
324         x6 = (W6 * x2 + 4) >> 3;
325         x2 = (W2 * x2 + 4) >> 3;
326         x8 = x0 - x2;
327         x0 += x2;
328         x2 = x8;
329         x8 = x4 - x6;
330         x4 += x6;
331         x6 = x8;
332 
333         x7 = (W7 * x1 + 4) >> 3;
334         x1 = (W1 * x1 + 4) >> 3;
335         x5 = (W3 * x3 + 4) >> 3;
336         x3 = (- W5 * x3 + 4) >> 3;
337         x8 = x1 - x5;
338         x1 += x5;
339         x5 = x8;
340         x8 = x7 - x3;
341         x3 += x7;
342         x7 = (181 * (x5 + x8) + 128) >> 8;
343         x5 = (181 * (x5 - x8) + 128) >> 8;
344 
345         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
346         res = (x0 + x1) >> 14;
347         ADD_AND_CLIP1(res);
348         res2 = (x4 + x7) >> 14;
349         ADD_AND_CLIP2(res2);
350         dst_word = (res2 << 8) | res;
351         res = (x6 + x5) >> 14;
352         ADD_AND_CLIP3(res);
353         dst_word |= (res << 16);
354         res = (x2 + x3) >> 14;
355         ADD_AND_CLIP4(res);
356         dst_word |= (res << 24);
357         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
358 
359         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
360         res = (x2 - x3) >> 14;
361         ADD_AND_CLIP1(res);
362         res2 = (x6 - x5) >> 14;
363         ADD_AND_CLIP2(res2);
364         dst_word = (res2 << 8) | res;
365         res = (x4 - x7) >> 14;
366         ADD_AND_CLIP3(res);
367         dst_word |= (res << 16);
368         res = (x0 - x1) >> 14;
369         ADD_AND_CLIP4(res);
370         dst_word |= (res << 24);
371         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
372     }
373     return ;
374 }
375 
idctcol4(int16 * blk)376 void idctcol4(int16 *blk)
377 {
378     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
379     x2 = blk[16];
380     x1 = blk[8];
381     x3 = blk[24];
382     x0 = ((int32)blk[0] << 11) + 128;
383 
384     x4 = x0;
385     x6 = W6 * x2;
386     x2 = W2 * x2;
387     x8 = x0 - x2;
388     x0 += x2;
389     x2 = x8;
390     x8 = x4 - x6;
391     x4 += x6;
392     x6 = x8;
393 
394     x7 = W7 * x1;
395     x1 = W1 * x1;
396     x5 = W3 * x3;
397     x3 = -W5 * x3;
398     x8 = x1 - x5;
399     x1 += x5;
400     x5 = x8;
401     x8 = x7 - x3;
402     x3 += x7;
403     x7 = (181 * (x5 + x8) + 128) >> 8;
404     x5 = (181 * (x5 - x8) + 128) >> 8;
405 
406 
407     blk[0] = (x0 + x1) >> 8;
408     blk[8] = (x4 + x7) >> 8;
409     blk[16] = (x6 + x5) >> 8;
410     blk[24] = (x2 + x3) >> 8;
411     blk[56] = (x0 - x1) >> 8;
412     blk[48] = (x4 - x7) >> 8;
413     blk[40] = (x6 - x5) >> 8;
414     blk[32] = (x2 - x3) >> 8;
415 
416     return ;
417 }
418 
idctrow0_intra(int16 * blk,PIXEL * comp,int width)419 void idctrow0_intra(int16 *blk, PIXEL * comp, int width)
420 {
421     OSCL_UNUSED_ARG(blk);
422     OSCL_UNUSED_ARG(comp);
423     OSCL_UNUSED_ARG(width);
424     return ;
425 }
426 
idctrow1_intra(int16 * blk,PIXEL * comp,int width)427 void idctrow1_intra(int16 *blk, PIXEL *comp, int width)
428 {
429     /* shortcut */
430     int32 tmp;
431     int i = 8;
432     int offset = width;
433     uint32 word;
434 
435     comp -= offset;
436     while (i--)
437     {
438         tmp = ((blk[0] + 32) >> 6);
439         blk[0] = 0;
440         CLIP_RESULT(tmp)
441 
442         word = (tmp << 8) | tmp;
443         word = (word << 16) | word;
444 
445         *((uint32*)(comp += offset)) = word;
446         *((uint32*)(comp + 4)) = word;
447 
448 
449 
450 
451         blk += B_SIZE;
452     }
453     return;
454 }
455 
idctrow2_intra(int16 * blk,PIXEL * comp,int width)456 void idctrow2_intra(int16 *blk, PIXEL *comp, int width)
457 {
458     int32 x0, x1, x2, x4, x5, temp;
459     int i = 8;
460     int offset = width;
461     int32 word;
462 
463     comp -= offset;
464     while (i--)
465     {
466         /* shortcut */
467         x4 = blk[1];
468         blk[1] = 0;
469         x0 = ((int32)blk[0] << 8) + 8192;
470         blk[0] = 0;   /* for proper rounding in the fourth stage */
471 
472         /* first stage */
473         x5 = (W7 * x4 + 4) >> 3;
474         x4 = (W1 * x4 + 4) >> 3;
475 
476         /* third stage */
477         x2 = (181 * (x4 + x5) + 128) >> 8;
478         x1 = (181 * (x4 - x5) + 128) >> 8;
479 
480         /* fourth stage */
481         word = ((x0 + x4) >> 14);
482         CLIP_RESULT(word)
483 
484         temp = ((x0 + x2) >> 14);
485         CLIP_RESULT(temp)
486         word = word | (temp << 8);
487         temp = ((x0 + x1) >> 14);
488         CLIP_RESULT(temp)
489         word = word | (temp << 16);
490         temp = ((x0 + x5) >> 14);
491         CLIP_RESULT(temp)
492         word = word | (temp << 24);
493         *((int32*)(comp += offset)) = word;
494 
495         word = ((x0 - x5) >> 14);
496         CLIP_RESULT(word)
497         temp = ((x0 - x1) >> 14);
498         CLIP_RESULT(temp)
499         word = word | (temp << 8);
500         temp = ((x0 - x2) >> 14);
501         CLIP_RESULT(temp)
502         word = word | (temp << 16);
503         temp = ((x0 - x4) >> 14);
504         CLIP_RESULT(temp)
505         word = word | (temp << 24);
506         *((int32*)(comp + 4)) = word;
507 
508         blk += B_SIZE;
509     }
510     return ;
511 }
512 
idctrow3_intra(int16 * blk,PIXEL * comp,int width)513 void idctrow3_intra(int16 *blk, PIXEL *comp, int width)
514 {
515     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
516     int i = 8;
517     int offset = width;
518     int32 word;
519 
520     comp -= offset;
521 
522     while (i--)
523     {
524         x2 = blk[2];
525         blk[2] = 0;
526         x1 = blk[1];
527         blk[1] = 0;
528         x0 = ((int32)blk[0] << 8) + 8192;
529         blk[0] = 0;/* for proper rounding in the fourth stage */
530         /* both upper and lower*/
531         /* both x2orx6 and x0orx4 */
532 
533         x4 = x0;
534         x6 = (W6 * x2 + 4) >> 3;
535         x2 = (W2 * x2 + 4) >> 3;
536         x8 = x0 - x2;
537         x0 += x2;
538         x2 = x8;
539         x8 = x4 - x6;
540         x4 += x6;
541         x6 = x8;
542 
543         x7 = (W7 * x1 + 4) >> 3;
544         x1 = (W1 * x1 + 4) >> 3;
545         x3 = x7;
546         x5 = (181 * (x1 - x7) + 128) >> 8;
547         x7 = (181 * (x1 + x7) + 128) >> 8;
548 
549         word = ((x0 + x1) >> 14);
550         CLIP_RESULT(word)
551         temp = ((x4 + x7) >> 14);
552         CLIP_RESULT(temp)
553         word = word | (temp << 8);
554 
555 
556         temp = ((x6 + x5) >> 14);
557         CLIP_RESULT(temp)
558         word = word | (temp << 16);
559 
560         temp = ((x2 + x3) >> 14);
561         CLIP_RESULT(temp)
562         word = word | (temp << 24);
563         *((int32*)(comp += offset)) = word;
564 
565         word = ((x2 - x3) >> 14);
566         CLIP_RESULT(word)
567 
568         temp = ((x6 - x5) >> 14);
569         CLIP_RESULT(temp)
570         word = word | (temp << 8);
571 
572         temp = ((x4 - x7) >> 14);
573         CLIP_RESULT(temp)
574         word = word | (temp << 16);
575 
576         temp = ((x0 - x1) >> 14);
577         CLIP_RESULT(temp)
578         word = word | (temp << 24);
579         *((int32*)(comp + 4)) = word;
580 
581         blk += B_SIZE;
582     }
583     return ;
584 }
585 
idctrow4_intra(int16 * blk,PIXEL * comp,int width)586 void idctrow4_intra(int16 *blk, PIXEL *comp, int width)
587 {
588     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
589     int i = 8;
590     int offset = width;
591     int32 word;
592 
593     comp -= offset;
594 
595     while (i--)
596     {
597         x2 = blk[2];
598         blk[2] = 0;
599         x1 = blk[1];
600         blk[1] = 0;
601         x3 = blk[3];
602         blk[3] = 0;
603         x0 = ((int32)blk[0] << 8) + 8192;
604         blk[0] = 0;/* for proper rounding in the fourth stage */
605 
606         x4 = x0;
607         x6 = (W6 * x2 + 4) >> 3;
608         x2 = (W2 * x2 + 4) >> 3;
609         x8 = x0 - x2;
610         x0 += x2;
611         x2 = x8;
612         x8 = x4 - x6;
613         x4 += x6;
614         x6 = x8;
615 
616         x7 = (W7 * x1 + 4) >> 3;
617         x1 = (W1 * x1 + 4) >> 3;
618         x5 = (W3 * x3 + 4) >> 3;
619         x3 = (- W5 * x3 + 4) >> 3;
620         x8 = x1 - x5;
621         x1 += x5;
622         x5 = x8;
623         x8 = x7 - x3;
624         x3 += x7;
625         x7 = (181 * (x5 + x8) + 128) >> 8;
626         x5 = (181 * (x5 - x8) + 128) >> 8;
627 
628         word = ((x0 + x1) >> 14);
629         CLIP_RESULT(word)
630 
631         temp = ((x4 + x7) >> 14);
632         CLIP_RESULT(temp)
633         word = word | (temp << 8);
634 
635 
636         temp = ((x6 + x5) >> 14);
637         CLIP_RESULT(temp)
638         word = word | (temp << 16);
639 
640         temp = ((x2 + x3) >> 14);
641         CLIP_RESULT(temp)
642         word = word | (temp << 24);
643         *((int32*)(comp += offset)) = word;
644 
645         word = ((x2 - x3) >> 14);
646         CLIP_RESULT(word)
647 
648         temp = ((x6 - x5) >> 14);
649         CLIP_RESULT(temp)
650         word = word | (temp << 8);
651 
652         temp = ((x4 - x7) >> 14);
653         CLIP_RESULT(temp)
654         word = word | (temp << 16);
655 
656         temp = ((x0 - x1) >> 14);
657         CLIP_RESULT(temp)
658         word = word | (temp << 24);
659         *((int32*)(comp + 4)) = word;
660 
661         blk += B_SIZE;
662     }
663 
664     return ;
665 }
666 
667 #endif
668 
669