• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "oscl_base_macros.h" // for OSCL_UNUSED_ARG
19 #include "mp4enc_lib.h"
20 #include "mp4lib_int.h"
21 #include "dct_inline.h"
22 
23 #define FDCT_SHIFT 10
24 
25 #ifdef __cplusplus
26 extern "C"
27 {
28 #endif
29 
30     /**************************************************************************/
31     /*  Function:   BlockDCT_AANwSub
32         Date:       7/31/01
33         Input:
34         Output:     out[64] ==> next block
35         Purpose:    Do subtraction for zero MV first
36         Modified:
37     **************************************************************************/
38 
BlockDCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)39     Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
40     {
41         Short *dst;
42         Int k0, k1, k2, k3, k4, k5, k6, k7;
43         Int round;
44         Int k12 = 0x022A02D4;
45         Int k14 = 0x0188053A;
46         Int abs_sum;
47         Int mask;
48         Int tmp, tmp2;
49         Int ColTh;
50 
51         dst = out + 64 ;
52         ColTh = *dst;
53         out += 128;
54         round = 1 << (FDCT_SHIFT - 1);
55 
56         do  /* fdct_nextrow */
57         {
58             /* assuming the block is word-aligned */
59             mask = 0x1FE;
60             tmp = *((Int*) cur);    /* contains 4 pixels */
61             tmp2 = *((Int*) pred); /* prediction 4 pixels */
62             k0 = tmp2 & 0xFF;
63             k1 = mask & (tmp << 1);
64             k0 = k1 - (k0 << 1);
65             k1 = (tmp2 >> 8) & 0xFF;
66             k2 = mask & (tmp >> 7);
67             k1 = k2 - (k1 << 1);
68             k2 = (tmp2 >> 16) & 0xFF;
69             k3 = mask & (tmp >> 15);
70             k2 = k3 - (k2 << 1);
71             k3 = (tmp2 >> 24) & 0xFF;
72             k4 = mask & (tmp >> 23);
73             k3 = k4 - (k3 << 1);
74             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
75             tmp2 = *((Int*)(pred + 4));
76             k4 = tmp2 & 0xFF;
77             k5 = mask & (tmp << 1);
78             k4 = k5 - (k4 << 1);
79             k5 = (tmp2 >> 8) & 0xFF;
80             k6 = mask & (tmp >> 7);
81             k5 = k6 - (k5 << 1);
82             k6 = (tmp2 >> 16) & 0xFF;
83             k7 = mask & (tmp >> 15);
84             k6 = k7 - (k6 << 1);
85             k7 = (tmp2 >> 24) & 0xFF;
86             tmp = mask & (tmp >> 23);
87             k7 = tmp - (k7 << 1);
88             cur += width;
89             pred += 16;
90 
91             /* fdct_1 */
92             k0 = k0 + k7;
93             k7 = k0 - (k7 << 1);
94             k1 = k1 + k6;
95             k6 = k1 - (k6 << 1);
96             k2 = k2 + k5;
97             k5 = k2 - (k5 << 1);
98             k3 = k3 + k4;
99             k4 = k3 - (k4 << 1);
100 
101             k0 = k0 + k3;
102             k3 = k0 - (k3 << 1);
103             k1 = k1 + k2;
104             k2 = k1 - (k2 << 1);
105 
106             k0 = k0 + k1;
107             k1 = k0 - (k1 << 1);
108             /**********/
109             dst[0] = k0;
110             dst[4] = k1; /* col. 4 */
111             /* fdct_2 */
112             k4 = k4 + k5;
113             k5 = k5 + k6;
114             k6 = k6 + k7;
115             k2 = k2 + k3;
116             /* MUL2C k2,k5,724,FDCT_SHIFT */
117             /* k0, k1 become scratch */
118             /* assume FAST MULTIPLY */
119             k1 = mla724(k12, k5, round);
120             k0 = mla724(k12, k2, round);
121 
122             k5 = k1 >> FDCT_SHIFT;
123             k2 = k0 >> FDCT_SHIFT;
124             /*****************/
125             k2 = k2 + k3;
126             k3 = (k3 << 1) - k2;
127             /********/
128             dst[2] = k2;        /* col. 2 */
129             k3 <<= 1;       /* scale up col. 6 */
130             dst[6] = k3; /* col. 6 */
131             /* fdct_3 */
132             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
133             /* assume FAST MULTIPLY */
134             /* k0, k1 are output */
135             k0 = k4 - k6;
136 
137             k1 = mla392(k0, k14, round);
138             k0 = mla554(k4, k12, k1);
139             k1 = mla1338(k6, k14, k1);
140 
141             k4 = k0 >> FDCT_SHIFT;
142             k6 = k1 >> FDCT_SHIFT;
143             /***********************/
144             k5 = k5 + k7;
145             k7 = (k7 << 1) - k5;
146             k4 = k4 + k7;
147             k7 = (k7 << 1) - k4;
148             k5 = k5 + k6;
149             k4 <<= 1;       /* scale up col.5 */
150             k6 = k5 - (k6 << 1);
151             /********/
152             dst[5] = k4;    /* col. 5 */
153             k6 <<= 2;       /* scale up col. 7 */
154             dst[1] = k5;    /* col. 1 */
155             dst[7] = k6;    /* col. 7 */
156             dst[3] = k7;    /* col. 3 */
157             dst += 8;
158         }
159         while (dst < out);
160 
161         out -= 64;
162         dst = out + 8;
163 
164         /*  Vertical Block Loop  */
165         do  /* Vertical 8xDCT loop */
166         {
167             k0 = out[0];
168             k1 = out[8];
169             k2 = out[16];
170             k3 = out[24];
171             k4 = out[32];
172             k5 = out[40];
173             k6 = out[48];
174             k7 = out[56];
175             /* deadzone thresholding for column */
176 
177             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
178 
179             if (abs_sum < ColTh)
180             {
181                 out[0] = 0x7fff;
182                 out++;
183                 continue;
184             }
185 
186             /* fdct_1 */
187             k0 = k0 + k7;
188             k7 = k0 - (k7 << 1);
189             k1 = k1 + k6;
190             k6 = k1 - (k6 << 1);
191             k2 = k2 + k5;
192             k5 = k2 - (k5 << 1);
193             k3 = k3 + k4;
194             k4 = k3 - (k4 << 1);
195 
196             k0 = k0 + k3;
197             k3 = k0 - (k3 << 1);
198             k1 = k1 + k2;
199             k2 = k1 - (k2 << 1);
200 
201             k0 = k0 + k1;
202             k1 = k0 - (k1 << 1);
203             /**********/
204             out[32] = k1; /* row 4 */
205             out[0] = k0; /* row 0 */
206             /* fdct_2 */
207             k4 = k4 + k5;
208             k5 = k5 + k6;
209             k6 = k6 + k7;
210             k2 = k2 + k3;
211             /* MUL2C k2,k5,724,FDCT_SHIFT */
212             /* k0, k1 become scratch */
213             /* assume FAST MULTIPLY */
214             k1 = mla724(k12, k5, round);
215             k0 = mla724(k12, k2, round);
216 
217             k5 = k1 >> FDCT_SHIFT;
218             k2 = k0 >> FDCT_SHIFT;
219             /*****************/
220             k2 = k2 + k3;
221             k3 = (k3 << 1) - k2;
222             k3 <<= 1;       /* scale up col. 6 */
223             /********/
224             out[48] = k3;   /* row 6 */
225             out[16] = k2;   /* row 2 */
226             /* fdct_3 */
227             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
228             /* assume FAST MULTIPLY */
229             /* k0, k1 are output */
230             k0 = k4 - k6;
231 
232             k1 = mla392(k0, k14, round);
233             k0 = mla554(k4, k12, k1);
234             k1 = mla1338(k6, k14, k1);
235 
236             k4 = k0 >> FDCT_SHIFT;
237             k6 = k1 >> FDCT_SHIFT;
238             /***********************/
239             k5 = k5 + k7;
240             k7 = (k7 << 1) - k5;
241             k4 = k4 + k7;
242             k7 = (k7 << 1) - k4;
243             k5 = k5 + k6;
244             k4 <<= 1;       /* scale up col. 5 */
245             k6 = k5 - (k6 << 1);
246             /********/
247             out[24] = k7 ;    /* row 3 */
248             k6 <<= 2;       /* scale up col. 7 */
249             out[56] = k6 ;   /* row 7 */
250             out[8] = k5 ;    /* row 1 */
251             out[40] = k4 ;   /* row 5 */
252             out++;
253         }
254         while ((UInt)out < (UInt)dst) ;
255 
256         return ;
257     }
258 
259     /**************************************************************************/
260     /*  Function:   Block4x4DCT_AANwSub
261         Date:       7/31/01
262         Input:
263         Output:     out[64] ==> next block
264         Purpose:    Do subtraction for zero MV first before 4x4 DCT
265         Modified:
266     **************************************************************************/
267 
Block4x4DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)268     Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
269     {
270         Short *dst;
271         register Int k0, k1, k2, k3, k4, k5, k6, k7;
272         Int round;
273         Int k12 = 0x022A02D4;
274         Int k14 = 0x0188053A;
275         Int mask;
276         Int tmp, tmp2;
277         Int abs_sum;
278         Int ColTh;
279 
280         dst = out + 64 ;
281         ColTh = *dst;
282         out += 128;
283         round = 1 << (FDCT_SHIFT - 1);
284 
285         do  /* fdct_nextrow */
286         {
287             /* assuming the block is word-aligned */
288             mask = 0x1FE;
289             tmp = *((Int*) cur);    /* contains 4 pixels */
290             tmp2 = *((Int*) pred); /* prediction 4 pixels */
291             k0 = tmp2 & 0xFF;
292             k1 = mask & (tmp << 1);
293             k0 = k1 - (k0 << 1);
294             k1 = (tmp2 >> 8) & 0xFF;
295             k2 = mask & (tmp >> 7);
296             k1 = k2 - (k1 << 1);
297             k2 = (tmp2 >> 16) & 0xFF;
298             k3 = mask & (tmp >> 15);
299             k2 = k3 - (k2 << 1);
300             k3 = (tmp2 >> 24) & 0xFF;
301             k4 = mask & (tmp >> 23);
302             k3 = k4 - (k3 << 1);
303             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
304             tmp2 = *((Int*)(pred + 4));
305             k4 = tmp2 & 0xFF;
306             k5 = mask & (tmp << 1);
307             k4 = k5 - (k4 << 1);
308             k5 = (tmp2 >> 8) & 0xFF;
309             k6 = mask & (tmp >> 7);
310             k5 = k6 - (k5 << 1);
311             k6 = (tmp2 >> 16) & 0xFF;
312             k7 = mask & (tmp >> 15);
313             k6 = k7 - (k6 << 1);
314             k7 = (tmp2 >> 24) & 0xFF;
315             tmp = mask & (tmp >> 23);
316             k7 = tmp - (k7 << 1);
317             cur += width;
318             pred += 16;
319 
320             /* fdct_1 */
321             k0 = k0 + k7;
322             k7 = k0 - (k7 << 1);
323             k1 = k1 + k6;
324             k6 = k1 - (k6 << 1);
325             k2 = k2 + k5;
326             k5 = k2 - (k5 << 1);
327             k3 = k3 + k4;
328             k4 = k3 - (k4 << 1);
329 
330             k0 = k0 + k3;
331             k3 = k0 - (k3 << 1);
332             k1 = k1 + k2;
333             k2 = k1 - (k2 << 1);
334 
335             k0 = k0 + k1;
336             /**********/
337             dst[0] = k0;
338             /* fdct_2 */
339             k4 = k4 + k5;
340             k5 = k5 + k6;
341             k6 = k6 + k7;
342             k2 = k2 + k3;
343             /* MUL2C k2,k5,724,FDCT_SHIFT */
344             /* k0, k1 become scratch */
345             /* assume FAST MULTIPLY */
346             k1 = mla724(k12, k5, round);
347             k0 = mla724(k12, k2, round);
348 
349             k5 = k1 >> FDCT_SHIFT;
350             k2 = k0 >> FDCT_SHIFT;
351             /*****************/
352             k2 = k2 + k3;
353             /********/
354             dst[2] = k2;        /* col. 2 */
355             /* fdct_3 */
356             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
357             /* assume FAST MULTIPLY */
358             /* k0, k1 are output */
359             k0 = k4 - k6;
360 
361             k1 = mla392(k0, k14, round);
362             k0 = mla554(k4, k12, k1);
363             k1 = mla1338(k6, k14, k1);
364 
365             k4 = k0 >> FDCT_SHIFT;
366             k6 = k1 >> FDCT_SHIFT;
367             /***********************/
368             k5 = k5 + k7;
369             k7 = (k7 << 1) - k5;
370             k7 = k7 - k4;
371             k5 = k5 + k6;
372             /********/
373             dst[1] = k5;        /* col. 1 */
374             dst[3] = k7;        /* col. 3 */
375             dst += 8;
376         }
377         while (dst < out);
378 
379         out -= 64;
380         dst = out + 4;
381 
382         /*  Vertical Block Loop  */
383         do  /* Vertical 8xDCT loop */
384         {
385             k0 = out[0];
386             k1 = out[8];
387             k2 = out[16];
388             k3 = out[24];
389             k4 = out[32];
390             k5 = out[40];
391             k6 = out[48];
392             k7 = out[56];
393 
394             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
395 
396             if (abs_sum < ColTh)
397             {
398                 out[0] = 0x7fff;
399                 out++;
400                 continue;
401             }
402             /* fdct_1 */
403             k0 = k0 + k7;
404             k7 = k0 - (k7 << 1);
405             k1 = k1 + k6;
406             k6 = k1 - (k6 << 1);
407             k2 = k2 + k5;
408             k5 = k2 - (k5 << 1);
409             k3 = k3 + k4;
410             k4 = k3 - (k4 << 1);
411 
412             k0 = k0 + k3;
413             k3 = k0 - (k3 << 1);
414             k1 = k1 + k2;
415             k2 = k1 - (k2 << 1);
416 
417             k0 = k0 + k1;
418             /**********/
419             out[0] = k0;   /* row 0 */
420             /* fdct_2 */
421             k4 = k4 + k5;
422             k5 = k5 + k6;
423             k6 = k6 + k7;
424             k2 = k2 + k3;
425             /* MUL2C k2,k5,724,FDCT_SHIFT */
426             /* k0, k1 become scratch */
427             /* assume FAST MULTIPLY */
428             k1 = mla724(k12, k5, round);
429             k0 = mla724(k12, k2, round);
430 
431             k5 = k1 >> FDCT_SHIFT;
432             k2 = k0 >> FDCT_SHIFT;
433             /*****************/
434             k2 = k2 + k3;
435             /********/
436             out[16] = k2;           /* row 2 */
437             /* fdct_3 */
438             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
439             /* assume FAST MULTIPLY */
440             /* k0, k1 are output */
441             k0 = k4 - k6;
442 
443             k1 = mla392(k0, k14, round);
444             k0 = mla554(k4, k12, k1);
445             k1 = mla1338(k6, k14, k1);
446 
447             k4 = k0 >> FDCT_SHIFT;
448             k6 = k1 >> FDCT_SHIFT;
449             /***********************/
450             k5 = k5 + k7;
451             k7 = (k7 << 1) - k5;
452             k7 = k7 - k4 ;
453             k5 = k5 + k6;
454             /********/
455             out[24] = k7 ;      /* row 3 */
456             out[8] = k5 ;       /* row 1 */
457             out++;
458         }
459         while ((UInt)out < (UInt)dst) ;
460 
461         return ;
462     }
463 
464     /**************************************************************************/
465     /*  Function:   Block2x2DCT_AANwSub
466         Date:       7/31/01
467         Input:
468         Output:     out[64] ==> next block
469         Purpose:    Do subtraction for zero MV first before 2x2 DCT
470         Modified:
471     **************************************************************************/
472 
473 
Block2x2DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)474     Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
475     {
476         Short *dst;
477         register Int k0, k1, k2, k3, k4, k5, k6, k7;
478         Int round;
479         Int k12 = 0x022A02D4;
480         Int k14 = 0x018803B2;
481         Int mask;
482         Int tmp, tmp2;
483         Int abs_sum;
484         Int ColTh;
485 
486         dst = out + 64 ;
487         ColTh = *dst;
488         out += 128;
489         round = 1 << (FDCT_SHIFT - 1);
490 
491         do  /* fdct_nextrow */
492         {
493             /* assuming the block is word-aligned */
494             mask = 0x1FE;
495             tmp = *((Int*) cur);    /* contains 4 pixels */
496             tmp2 = *((Int*) pred); /* prediction 4 pixels */
497             k0 = tmp2 & 0xFF;
498             k1 = mask & (tmp << 1);
499             k0 = k1 - (k0 << 1);
500             k1 = (tmp2 >> 8) & 0xFF;
501             k2 = mask & (tmp >> 7);
502             k1 = k2 - (k1 << 1);
503             k2 = (tmp2 >> 16) & 0xFF;
504             k3 = mask & (tmp >> 15);
505             k2 = k3 - (k2 << 1);
506             k3 = (tmp2 >> 24) & 0xFF;
507             k4 = mask & (tmp >> 23);
508             k3 = k4 - (k3 << 1);
509             tmp = *((Int*)(cur + 4));   /* another 4 pixels */
510             tmp2 = *((Int*)(pred + 4));
511             k4 = tmp2 & 0xFF;
512             k5 = mask & (tmp << 1);
513             k4 = k5 - (k4 << 1);
514             k5 = (tmp2 >> 8) & 0xFF;
515             k6 = mask & (tmp >> 7);
516             k5 = k6 - (k5 << 1);
517             k6 = (tmp2 >> 16) & 0xFF;
518             k7 = mask & (tmp >> 15);
519             k6 = k7 - (k6 << 1);
520             k7 = (tmp2 >> 24) & 0xFF;
521             tmp = mask & (tmp >> 23);
522             k7 = tmp - (k7 << 1);
523             cur += width;
524             pred += 16;
525 
526             /* fdct_1 */
527             k0 = k0 + k7;
528             k7 = k0 - (k7 << 1);
529             k1 = k1 + k6;
530             k6 = k1 - (k6 << 1);
531             k2 = k2 + k5;
532             k5 = k2 - (k5 << 1);
533             k3 = k3 + k4;
534             k4 = k3 - (k4 << 1);
535 
536             k0 = k0 + k3;
537             k3 = k0 - (k3 << 1);
538             k1 = k1 + k2;
539             k2 = k1 - (k2 << 1);
540 
541             k0 = k0 + k1;
542             /**********/
543             dst[0] = k0;
544             /* fdct_2 */
545             k4 = k4 + k5;
546             k5 = k5 + k6;
547             k6 = k6 + k7;
548             /* MUL2C k2,k5,724,FDCT_SHIFT */
549             /* k0, k1 become scratch */
550             /* assume FAST MULTIPLY */
551             k1 = mla724(k12, k5, round);
552 
553             k5 = k1 >> FDCT_SHIFT;
554             /*****************/
555             /********/
556             /* fdct_3 */
557             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
558             /* assume FAST MULTIPLY */
559             /* k0, k1 are output */
560             k1 = mla392(k4, k14, round);
561             k1 = mla946(k6, k14, k1);
562 
563             k6 = k1 >> FDCT_SHIFT;
564             /***********************/
565             k5 = k5 + k7;
566             k5 = k5 + k6;
567             /********/
568             dst[1] = k5;
569             dst += 8;
570         }
571         while (dst < out);
572         out -= 64;
573         dst = out + 2;
574         /*  Vertical Block Loop  */
575         do  /* Vertical 8xDCT loop */
576         {
577             k0 = out[0];
578             k1 = out[8];
579             k2 = out[16];
580             k3 = out[24];
581             k4 = out[32];
582             k5 = out[40];
583             k6 = out[48];
584             k7 = out[56];
585 
586             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
587 
588             if (abs_sum < ColTh)
589             {
590                 out[0] = 0x7fff;
591                 out++;
592                 continue;
593             }
594             /* fdct_1 */
595             k0 = k0 + k7;
596             k7 = k0 - (k7 << 1);
597             k1 = k1 + k6;
598             k6 = k1 - (k6 << 1);
599             k2 = k2 + k5;
600             k5 = k2 - (k5 << 1);
601             k3 = k3 + k4;
602             k4 = k3 - (k4 << 1);
603 
604             k0 = k0 + k3;
605             k3 = k0 - (k3 << 1);
606             k1 = k1 + k2;
607             k2 = k1 - (k2 << 1);
608 
609             k0 = k0 + k1;
610             /**********/
611             out[0] = k0;        /* row 0 */
612             /* fdct_2 */
613             k4 = k4 + k5;
614             k5 = k5 + k6;
615             k6 = k6 + k7;
616             /* MUL2C k2,k5,724,FDCT_SHIFT */
617             /* k0, k1 become scratch */
618             /* assume FAST MULTIPLY */
619             k1 = mla724(k12, k5, round);
620 
621             k5 = k1 >> FDCT_SHIFT;
622             /*****************/
623             /********/
624             /* fdct_3 */
625             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
626             /* assume FAST MULTIPLY */
627             /* k0, k1 are output */
628             k1 = mla392(k4, k14, round);
629             k1 = mla946(k6, k14, k1);
630 
631             k6 = k1 >> FDCT_SHIFT;
632             /***********************/
633             k5 = k5 + k7;
634             k5 = k5 + k6;
635             /********/
636             out[8] = k5 ;       /* row 1 */
637             out++;
638         }
639         while ((UInt)out < (UInt)dst) ;
640 
641         return ;
642     }
643 
644     /**************************************************************************/
645     /*  Function:   BlockDCT_AANIntra
646         Date:       8/9/01
647         Input:      rec
648         Output:     out[64] ==> next block
649         Purpose:    Input directly from rec frame.
650         Modified:
651     **************************************************************************/
652 
BlockDCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)653     Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
654     {
655         Short *dst;
656         Int k0, k1, k2, k3, k4, k5, k6, k7;
657         Int round;
658         Int k12 = 0x022A02D4;
659         Int k14 = 0x0188053A;
660         Int abs_sum;
661         Int mask;
662         Int *curInt, tmp;
663         Int ColTh;
664 
665         OSCL_UNUSED_ARG(dummy2);
666 
667         dst = out + 64 ;
668         ColTh = *dst;
669         out += 128;
670         round = 1 << (FDCT_SHIFT - 1);
671 
672         do  /* fdct_nextrow */
673         {
674             mask = 0x1FE;
675             curInt = (Int*) cur;
676             tmp = curInt[0];    /* contains 4 pixels */
677             k0 = mask & (tmp << 1);
678             k1 = mask & (tmp >> 7);
679             k2 = mask & (tmp >> 15);
680             k3 = mask & (tmp >> 23);
681             tmp = curInt[1];    /* another 4 pixels */
682             k4 =  mask & (tmp << 1);
683             k5 =  mask & (tmp >> 7);
684             k6 =  mask & (tmp >> 15);
685             k7 =  mask & (tmp >> 23);
686             cur += width;
687             /* fdct_1 */
688             k0 = k0 + k7;
689             k7 = k0 - (k7 << 1);
690             k1 = k1 + k6;
691             k6 = k1 - (k6 << 1);
692             k2 = k2 + k5;
693             k5 = k2 - (k5 << 1);
694             k3 = k3 + k4;
695             k4 = k3 - (k4 << 1);
696 
697             k0 = k0 + k3;
698             k3 = k0 - (k3 << 1);
699             k1 = k1 + k2;
700             k2 = k1 - (k2 << 1);
701 
702             k0 = k0 + k1;
703             k1 = k0 - (k1 << 1);
704             /**********/
705             dst[0] = k0;
706             dst[4] = k1; /* col. 4 */
707             /* fdct_2 */
708             k4 = k4 + k5;
709             k5 = k5 + k6;
710             k6 = k6 + k7;
711             k2 = k2 + k3;
712             /* MUL2C k2,k5,724,FDCT_SHIFT */
713             /* k0, k1 become scratch */
714             /* assume FAST MULTIPLY */
715             k1 = mla724(k12, k5, round);
716             k0 = mla724(k12, k2, round);
717 
718             k5 = k1 >> FDCT_SHIFT;
719             k2 = k0 >> FDCT_SHIFT;
720             /*****************/
721             k2 = k2 + k3;
722             k3 = (k3 << 1) - k2;
723             /********/
724             dst[2] = k2;        /* col. 2 */
725             k3 <<= 1;       /* scale up col. 6 */
726             dst[6] = k3; /* col. 6 */
727             /* fdct_3 */
728             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
729             /* assume FAST MULTIPLY */
730             /* k0, k1 are output */
731             k0 = k4 - k6;
732 
733             k1 = mla392(k0, k14, round);
734             k0 = mla554(k4, k12, k1);
735             k1 = mla1338(k6, k14, k1);
736 
737             k4 = k0 >> FDCT_SHIFT;
738             k6 = k1 >> FDCT_SHIFT;
739             /***********************/
740             k5 = k5 + k7;
741             k7 = (k7 << 1) - k5;
742             k4 = k4 + k7;
743             k7 = (k7 << 1) - k4;
744             k5 = k5 + k6;
745             k4 <<= 1;       /* scale up col.5 */
746             k6 = k5 - (k6 << 1);
747             /********/
748             dst[5] = k4;    /* col. 5 */
749             k6 <<= 2;       /* scale up col. 7 */
750             dst[1] = k5;    /* col. 1 */
751             dst[7] = k6;    /* col. 7 */
752             dst[3] = k7;    /* col. 3 */
753             dst += 8;
754         }
755         while (dst < out);
756 
757         out -= 64;
758         dst = out + 8;
759 
760         /*  Vertical Block Loop  */
761         do  /* Vertical 8xDCT loop */
762         {
763             k0 = out[0];
764             k1 = out[8];
765             k2 = out[16];
766             k3 = out[24];
767             k4 = out[32];
768             k5 = out[40];
769             k6 = out[48];
770             k7 = out[56];
771             /* deadzone thresholding for column */
772 
773             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
774 
775             if (abs_sum < ColTh)
776             {
777                 out[0] = 0x7fff;
778                 out++;
779                 continue;
780             }
781 
782             /* fdct_1 */
783             k0 = k0 + k7;
784             k7 = k0 - (k7 << 1);
785             k1 = k1 + k6;
786             k6 = k1 - (k6 << 1);
787             k2 = k2 + k5;
788             k5 = k2 - (k5 << 1);
789             k3 = k3 + k4;
790             k4 = k3 - (k4 << 1);
791 
792             k0 = k0 + k3;
793             k3 = k0 - (k3 << 1);
794             k1 = k1 + k2;
795             k2 = k1 - (k2 << 1);
796 
797             k0 = k0 + k1;
798             k1 = k0 - (k1 << 1);
799             /**********/
800             out[32] = k1; /* row 4 */
801             out[0] = k0; /* row 0 */
802             /* fdct_2 */
803             k4 = k4 + k5;
804             k5 = k5 + k6;
805             k6 = k6 + k7;
806             k2 = k2 + k3;
807             /* MUL2C k2,k5,724,FDCT_SHIFT */
808             /* k0, k1 become scratch */
809             /* assume FAST MULTIPLY */
810             k1 = mla724(k12, k5, round);
811             k0 = mla724(k12, k2, round);
812 
813             k5 = k1 >> FDCT_SHIFT;
814             k2 = k0 >> FDCT_SHIFT;
815             /*****************/
816             k2 = k2 + k3;
817             k3 = (k3 << 1) - k2;
818             k3 <<= 1;       /* scale up col. 6 */
819             /********/
820             out[48] = k3;   /* row 6 */
821             out[16] = k2;   /* row 2 */
822             /* fdct_3 */
823             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
824             /* assume FAST MULTIPLY */
825             /* k0, k1 are output */
826             k0 = k4 - k6;
827 
828             k1 = mla392(k0, k14, round);
829             k0 = mla554(k4, k12, k1);
830             k1 = mla1338(k6, k14, k1);
831 
832             k4 = k0 >> FDCT_SHIFT;
833             k6 = k1 >> FDCT_SHIFT;
834             /***********************/
835             k5 = k5 + k7;
836             k7 = (k7 << 1) - k5;
837             k4 = k4 + k7;
838             k7 = (k7 << 1) - k4;
839             k5 = k5 + k6;
840             k4 <<= 1;       /* scale up col. 5 */
841             k6 = k5 - (k6 << 1);
842             /********/
843             out[24] = k7 ;    /* row 3 */
844             k6 <<= 2;       /* scale up col. 7 */
845             out[56] = k6 ;   /* row 7 */
846             out[8] = k5 ;    /* row 1 */
847             out[40] = k4 ;   /* row 5 */
848             out++;
849         }
850         while ((UInt)out < (UInt)dst) ;
851 
852         return ;
853     }
854 
855     /**************************************************************************/
856     /*  Function:   Block4x4DCT_AANIntra
857         Date:       8/9/01
858         Input:      prev
859         Output:     out[64] ==> next block
860         Purpose:    Input directly from prev frame. output 2x2 DCT
861         Modified:
862     **************************************************************************/
863 
Block4x4DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)864     Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
865     {
866         Short *dst;
867         register Int k0, k1, k2, k3, k4, k5, k6, k7;
868         Int round;
869         Int k12 = 0x022A02D4;
870         Int k14 = 0x0188053A;
871         Int mask;
872         Int *curInt, tmp;
873         Int abs_sum;
874         Int ColTh;
875 
876         OSCL_UNUSED_ARG(dummy2);
877 
878         dst = out + 64 ;
879         ColTh = *dst;
880         out += 128;
881         round = 1 << (FDCT_SHIFT - 1);
882 
883         do  /* fdct_nextrow */
884         {
885             mask = 0x1FE;
886             curInt = (Int*) cur;
887             tmp = curInt[0];    /* contains 4 pixels */
888             k0 = mask & (tmp << 1);
889             k1 = mask & (tmp >> 7);
890             k2 = mask & (tmp >> 15);
891             k3 = mask & (tmp >> 23);
892             tmp = curInt[1];    /* another 4 pixels */
893             k4 =  mask & (tmp << 1);
894             k5 =  mask & (tmp >> 7);
895             k6 =  mask & (tmp >> 15);
896             k7 =  mask & (tmp >> 23);
897             cur += width;
898             /* fdct_1 */
899             k0 = k0 + k7;
900             k7 = k0 - (k7 << 1);
901             k1 = k1 + k6;
902             k6 = k1 - (k6 << 1);
903             k2 = k2 + k5;
904             k5 = k2 - (k5 << 1);
905             k3 = k3 + k4;
906             k4 = k3 - (k4 << 1);
907 
908             k0 = k0 + k3;
909             k3 = k0 - (k3 << 1);
910             k1 = k1 + k2;
911             k2 = k1 - (k2 << 1);
912 
913             k0 = k0 + k1;
914             /**********/
915             dst[0] = k0;
916             /* fdct_2 */
917             k4 = k4 + k5;
918             k5 = k5 + k6;
919             k6 = k6 + k7;
920             k2 = k2 + k3;
921             /* MUL2C k2,k5,724,FDCT_SHIFT */
922             /* k0, k1 become scratch */
923             /* assume FAST MULTIPLY */
924             k1 = mla724(k12, k5, round);
925             k0 = mla724(k12, k2, round);
926 
927             k5 = k1 >> FDCT_SHIFT;
928             k2 = k0 >> FDCT_SHIFT;
929             /*****************/
930             k2 = k2 + k3;
931             /********/
932             dst[2] = k2;        /* col. 2 */
933             /* fdct_3 */
934             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
935             /* assume FAST MULTIPLY */
936             /* k0, k1 are output */
937             k0 = k4 - k6;
938 
939             k1 = mla392(k0, k14, round);
940             k0 = mla554(k4, k12, k1);
941             k1 = mla1338(k6, k14, k1);
942 
943             k4 = k0 >> FDCT_SHIFT;
944             k6 = k1 >> FDCT_SHIFT;
945             /***********************/
946             k5 = k5 + k7;
947             k7 = (k7 << 1) - k5;
948             k7 = k7 - k4;
949             k5 = k5 + k6;
950             /********/
951             dst[1] = k5;        /* col. 1 */
952             dst[3] = k7;        /* col. 3 */
953             dst += 8;
954         }
955         while (dst < out);
956 
957         out -= 64;
958         dst = out + 4;
959 
960         /*  Vertical Block Loop  */
961         do  /* Vertical 8xDCT loop */
962         {
963             k0 = out[0];
964             k1 = out[8];
965             k2 = out[16];
966             k3 = out[24];
967             k4 = out[32];
968             k5 = out[40];
969             k6 = out[48];
970             k7 = out[56];
971 
972             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
973 
974             if (abs_sum < ColTh)
975             {
976                 out[0] = 0x7fff;
977                 out++;
978                 continue;
979             }
980             /* fdct_1 */
981             k0 = k0 + k7;
982             k7 = k0 - (k7 << 1);
983             k1 = k1 + k6;
984             k6 = k1 - (k6 << 1);
985             k2 = k2 + k5;
986             k5 = k2 - (k5 << 1);
987             k3 = k3 + k4;
988             k4 = k3 - (k4 << 1);
989 
990             k0 = k0 + k3;
991             k3 = k0 - (k3 << 1);
992             k1 = k1 + k2;
993             k2 = k1 - (k2 << 1);
994 
995             k0 = k0 + k1;
996             /**********/
997             out[0] = k0;   /* row 0 */
998             /* fdct_2 */
999             k4 = k4 + k5;
1000             k5 = k5 + k6;
1001             k6 = k6 + k7;
1002             k2 = k2 + k3;
1003             /* MUL2C k2,k5,724,FDCT_SHIFT */
1004             /* k0, k1 become scratch */
1005             /* assume FAST MULTIPLY */
1006             k1 = mla724(k12, k5, round);
1007             k0 = mla724(k12, k2, round);
1008 
1009             k5 = k1 >> FDCT_SHIFT;
1010             k2 = k0 >> FDCT_SHIFT;
1011             /*****************/
1012             k2 = k2 + k3;
1013             /********/
1014             out[16] = k2;           /* row 2 */
1015             /* fdct_3 */
1016             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1017             /* assume FAST MULTIPLY */
1018             /* k0, k1 are output */
1019             k0 = k4 - k6;
1020 
1021             k1 = mla392(k0, k14, round);
1022             k0 = mla554(k4, k12, k1);
1023             k1 = mla1338(k6, k14, k1);
1024 
1025             k4 = k0 >> FDCT_SHIFT;
1026             k6 = k1 >> FDCT_SHIFT;
1027             /***********************/
1028             k5 = k5 + k7;
1029             k7 = (k7 << 1) - k5;
1030             k7 = k7 - k4 ;
1031             k5 = k5 + k6;
1032             /********/
1033             out[24] = k7 ;      /* row 3 */
1034             out[8] = k5 ;       /* row 1 */
1035             out++;
1036         }
1037         while ((UInt)out < (UInt)dst) ;
1038 
1039         return ;
1040     }
1041 
1042     /**************************************************************************/
1043     /*  Function:   Block2x2DCT_AANIntra
1044         Date:       8/9/01
1045         Input:      prev
1046         Output:     out[64] ==> next block
1047         Purpose:    Input directly from prev frame. output 2x2 DCT
1048         Modified:
1049     **************************************************************************/
1050 
Block2x2DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1051     Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
1052     {
1053         Short *dst;
1054         register Int k0, k1, k2, k3, k4, k5, k6, k7;
1055         Int round;
1056         Int k12 = 0x022A02D4;
1057         Int k14 = 0x018803B2;
1058         Int mask;
1059         Int *curInt, tmp;
1060         Int abs_sum;
1061         Int ColTh;
1062 
1063         OSCL_UNUSED_ARG(dummy2);
1064 
1065         dst = out + 64 ;
1066         ColTh = *dst;
1067         out += 128;
1068         round = 1 << (FDCT_SHIFT - 1);
1069 
1070         do  /* fdct_nextrow */
1071         {
1072             mask = 0x1FE;
1073             curInt = (Int*) cur;
1074             tmp = curInt[0];    /* contains 4 pixels */
1075             k0 = mask & (tmp << 1);
1076             k1 = mask & (tmp >> 7);
1077             k2 = mask & (tmp >> 15);
1078             k3 = mask & (tmp >> 23);
1079             tmp = curInt[1];    /* another 4 pixels */
1080             k4 =  mask & (tmp << 1);
1081             k5 =  mask & (tmp >> 7);
1082             k6 =  mask & (tmp >> 15);
1083             k7 =  mask & (tmp >> 23);
1084             cur += width;
1085 
1086             /* fdct_1 */
1087             k0 = k0 + k7;
1088             k7 = k0 - (k7 << 1);
1089             k1 = k1 + k6;
1090             k6 = k1 - (k6 << 1);
1091             k2 = k2 + k5;
1092             k5 = k2 - (k5 << 1);
1093             k3 = k3 + k4;
1094             k4 = k3 - (k4 << 1);
1095 
1096             k0 = k0 + k3;
1097             k3 = k0 - (k3 << 1);
1098             k1 = k1 + k2;
1099             k2 = k1 - (k2 << 1);
1100 
1101             k0 = k0 + k1;
1102             /**********/
1103             dst[0] = k0;
1104             /* fdct_2 */
1105             k4 = k4 + k5;
1106             k5 = k5 + k6;
1107             k6 = k6 + k7;
1108             /* MUL2C k2,k5,724,FDCT_SHIFT */
1109             /* k0, k1 become scratch */
1110             /* assume FAST MULTIPLY */
1111             k1 = mla724(k12, k5, round);
1112 
1113             k5 = k1 >> FDCT_SHIFT;
1114             /*****************/
1115             /********/
1116             /* fdct_3 */
1117             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1118             /* assume FAST MULTIPLY */
1119             /* k0, k1 are output */
1120             k1 = mla392(k4, k14, round);
1121             k1 = mla946(k6, k14, k1);
1122 
1123             k6 = k1 >> FDCT_SHIFT;
1124             /***********************/
1125             k5 = k5 + k7;
1126             k5 = k5 + k6;
1127             /********/
1128             dst[1] = k5;
1129             dst += 8;
1130         }
1131         while (dst < out);
1132         out -= 64;
1133         dst = out + 2;
1134         /*  Vertical Block Loop  */
1135         do  /* Vertical 8xDCT loop */
1136         {
1137             k0 = out[0];
1138             k1 = out[8];
1139             k2 = out[16];
1140             k3 = out[24];
1141             k4 = out[32];
1142             k5 = out[40];
1143             k6 = out[48];
1144             k7 = out[56];
1145 
1146             abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
1147 
1148             if (abs_sum < ColTh)
1149             {
1150                 out[0] = 0x7fff;
1151                 out++;
1152                 continue;
1153             }
1154             /* fdct_1 */
1155             k0 = k0 + k7;
1156             k7 = k0 - (k7 << 1);
1157             k1 = k1 + k6;
1158             k6 = k1 - (k6 << 1);
1159             k2 = k2 + k5;
1160             k5 = k2 - (k5 << 1);
1161             k3 = k3 + k4;
1162             k4 = k3 - (k4 << 1);
1163 
1164             k0 = k0 + k3;
1165             k3 = k0 - (k3 << 1);
1166             k1 = k1 + k2;
1167             k2 = k1 - (k2 << 1);
1168 
1169             k0 = k0 + k1;
1170             /**********/
1171             out[0] = k0;        /* row 0 */
1172             /* fdct_2 */
1173             k4 = k4 + k5;
1174             k5 = k5 + k6;
1175             k6 = k6 + k7;
1176             /* MUL2C k2,k5,724,FDCT_SHIFT */
1177             /* k0, k1 become scratch */
1178             /* assume FAST MULTIPLY */
1179             k1 = mla724(k12, k5, round);
1180 
1181             k5 = k1 >> FDCT_SHIFT;
1182             /*****************/
1183             /********/
1184             /* fdct_3 */
1185             /* ROTATE k4,k6,392,946, FDCT_SHIFT */
1186             /* assume FAST MULTIPLY */
1187             /* k0, k1 are output */
1188             k1 = mla392(k4, k14, round);
1189             k1 = mla946(k6, k14, k1);
1190 
1191             k6 = k1 >> FDCT_SHIFT;
1192             /***********************/
1193             k5 = k5 + k7;
1194             k5 = k5 + k6;
1195             /********/
1196             out[8] = k5 ;       /* row 1 */
1197             out++;
1198         }
1199         while ((UInt)out < (UInt)dst) ;
1200 
1201         return ;
1202     }
1203     /**************************************************************************/
1204     /*  Function:   Block1x1DCTwSub
1205         Date:       8/9/01
1206         Input:      block
1207         Output:     y
1208         Purpose:    Compute DC value only
1209         Modified:
1210     **************************************************************************/
Block1x1DCTwSub(Short * out,UChar * cur,UChar * pred,Int width)1211     void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width)
1212     {
1213         UChar *end;
1214         Int temp = 0;
1215         Int offset2;
1216 
1217         offset2 = width - 8;
1218         end = pred + (16 << 3);
1219         do
1220         {
1221             temp += (*cur++ - *pred++);
1222             temp += (*cur++ - *pred++);
1223             temp += (*cur++ - *pred++);
1224             temp += (*cur++ - *pred++);
1225             temp += (*cur++ - *pred++);
1226             temp += (*cur++ - *pred++);
1227             temp += (*cur++ - *pred++);
1228             temp += (*cur++ - *pred++);
1229             cur += offset2;
1230             pred += 8;
1231         }
1232         while (pred < end) ;
1233 
1234         out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
1235         out[0] = temp >> 3;
1236 
1237         return ;
1238     }
1239 
1240     /**************************************************************************/
1241     /*  Function:   Block1x1DCTIntra
1242         Date:       8/9/01
1243         Input:      prev
1244         Output:     out
1245         Purpose:    Compute DC value only
1246         Modified:
1247     **************************************************************************/
Block1x1DCTIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1248     void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
1249     {
1250         UChar *end;
1251         Int temp = 0;
1252         ULong word;
1253 
1254         OSCL_UNUSED_ARG(dummy2);
1255 
1256         end = cur + (width << 3);
1257         do
1258         {
1259             word = *((ULong*)cur);
1260             temp += (word >> 24);
1261             temp += ((word >> 16) & 0xFF);
1262             temp += ((word >> 8) & 0xFF);
1263             temp += (word & 0xFF);
1264 
1265             word = *((ULong*)(cur + 4));
1266             temp += (word >> 24);
1267             temp += ((word >> 16) & 0xFF);
1268             temp += ((word >> 8) & 0xFF);
1269             temp += (word & 0xFF);
1270 
1271             cur += width;
1272         }
1273         while (cur < end) ;
1274 
1275         out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
1276         out[0] = temp >> 3;
1277 
1278         return ;
1279     }
1280 
1281 #ifdef __cplusplus
1282 }
1283 #endif
1284 
1285