• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*  Filename: dct_inline.h                                                      */
19 /*  Description: Implementation for in-line functions used in dct.cpp           */
20 /*  Modified:                                                                   */
21 /*********************************************************************************/
22 #ifndef _DCT_INLINE_H_
23 #define _DCT_INLINE_H_
24 
25 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4)
26 
mla724(int32 op1,int32 op2,int32 op3)27 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
28 {
29     int32 out;
30 
31     OSCL_UNUSED_ARG(op1);
32 
33     out = op2 * 724 + op3; /* op1 is not used here */
34 
35     return out;
36 }
37 
mla392(int32 k0,int32 k14,int32 round)38 __inline int32 mla392(int32 k0, int32 k14, int32 round)
39 {
40     int32 k1;
41 
42     OSCL_UNUSED_ARG(k14);
43 
44     k1 = k0 * 392 + round;
45 
46     return k1;
47 }
48 
mla554(int32 k4,int32 k12,int32 k1)49 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
50 {
51     int32 k0;
52 
53     OSCL_UNUSED_ARG(k12);
54 
55     k0 = k4 * 554 + k1;
56 
57     return k0;
58 }
59 
mla1338(int32 k6,int32 k14,int32 k1)60 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
61 {
62     int32 out;
63 
64     OSCL_UNUSED_ARG(k14);
65 
66     out = k6 * 1338 + k1;
67 
68     return out;
69 }
70 
mla946(int32 k6,int32 k14,int32 k1)71 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
72 {
73     int32 out;
74 
75     OSCL_UNUSED_ARG(k14);
76 
77     out = k6 * 946 + k1;
78 
79     return out;
80 }
81 
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)82 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
83                        int32 k4, int32 k5, int32 k6, int32 k7)
84 {
85     int32 carry, abs_sum;
86 
87     carry = k0 >> 31;
88     abs_sum = (k0 ^ carry);
89     carry = k1 >> 31;
90     abs_sum += (k1 ^ carry) - carry;
91     carry = k2 >> 31;
92     abs_sum += (k2 ^ carry) - carry;
93     carry = k3 >> 31;
94     abs_sum += (k3 ^ carry) - carry;
95     carry = k4 >> 31;
96     abs_sum += (k4 ^ carry) - carry;
97     carry = k5 >> 31;
98     abs_sum += (k5 ^ carry) - carry;
99     carry = k6 >> 31;
100     abs_sum += (k6 ^ carry) - carry;
101     carry = k7 >> 31;
102     abs_sum += (k7 ^ carry) - carry;
103 
104     return abs_sum;
105 }
106 
107 #elif defined(__CC_ARM)  /* only work with arm v5 */
108 
109 #if defined(__TARGET_ARCH_5TE)
110 
mla724(int32 op1,int32 op2,int32 op3)111 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
112 {
113     int32 out;
114 
115     __asm
116     {
117         smlabb out, op1, op2, op3
118     }
119 
120     return out;
121 }
122 
mla392(int32 k0,int32 k14,int32 round)123 __inline int32 mla392(int32 k0, int32 k14, int32 round)
124 {
125     int32 k1;
126 
127     __asm
128     {
129         smlabt k1, k0, k14, round
130     }
131 
132     return k1;
133 }
134 
mla554(int32 k4,int32 k12,int32 k1)135 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
136 {
137     int32 k0;
138 
139     __asm
140     {
141         smlabt k0, k4, k12, k1
142     }
143 
144     return k0;
145 }
146 
mla1338(int32 k6,int32 k14,int32 k1)147 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
148 {
149     int32 out;
150 
151     __asm
152     {
153         smlabb out, k6, k14, k1
154     }
155 
156     return out;
157 }
158 
mla946(int32 k6,int32 k14,int32 k1)159 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
160 {
161     int32 out;
162 
163     __asm
164     {
165         smlabb out, k6, k14, k1
166     }
167 
168     return out;
169 }
170 
171 #else // not ARM5TE
172 
173 
mla724(int32 op1,int32 op2,int32 op3)174 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
175 {
176     int32 out;
177 
178     __asm
179     {
180         and out, op2, #0xFFFF
181         mla out, op1, out, op3
182     }
183 
184     return out;
185 }
186 
mla392(int32 k0,int32 k14,int32 round)187 __inline int32 mla392(int32 k0, int32 k14, int32 round)
188 {
189     int32 k1;
190 
191     __asm
192     {
193         mov k1, k14, asr #16
194         mla k1, k0, k1, round
195     }
196 
197     return k1;
198 }
199 
mla554(int32 k4,int32 k12,int32 k1)200 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
201 {
202     int32 k0;
203 
204     __asm
205     {
206         mov  k0, k12, asr #16
207         mla k0, k4, k0, k1
208     }
209 
210     return k0;
211 }
212 
mla1338(int32 k6,int32 k14,int32 k1)213 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
214 {
215     int32 out;
216 
217     __asm
218     {
219         and out, k14, 0xFFFF
220         mla out, k6, out, k1
221     }
222 
223     return out;
224 }
225 
mla946(int32 k6,int32 k14,int32 k1)226 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
227 {
228     int32 out;
229 
230     __asm
231     {
232         and out, k14, 0xFFFF
233         mla out, k6, out, k1
234     }
235 
236     return out;
237 }
238 
239 #endif
240 
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)241 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
242                        int32 k4, int32 k5, int32 k6, int32 k7)
243 {
244     int32 carry, abs_sum;
245     __asm
246     {
247         eor     carry, k0, k0, asr #31 ;
248         eors    abs_sum, k1, k1, asr #31 ;
249         adc     abs_sum, abs_sum, carry ;
250         eors    carry,  k2, k2, asr #31 ;
251         adc     abs_sum, abs_sum, carry ;
252         eors    carry,  k3, k3, asr #31 ;
253         adc     abs_sum, abs_sum, carry ;
254         eors    carry,  k4, k4, asr #31 ;
255         adc     abs_sum, abs_sum, carry ;
256         eors    carry,  k5, k5, asr #31 ;
257         adc     abs_sum, abs_sum, carry ;
258         eors    carry,  k6, k6, asr #31 ;
259         adc     abs_sum, abs_sum, carry ;
260         eors    carry,  k7, k7, asr #31 ;
261         adc     abs_sum, abs_sum, carry ;
262     }
263 
264     return abs_sum;
265 }
266 
267 #elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) )  /* ARM GNU COMPILER  */
268 
mla724(int32 op1,int32 op2,int32 op3)269 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
270 {
271     register int32 out;
272     register int32 aa = (int32)op1;
273     register int32 bb = (int32)op2;
274     register int32 cc = (int32)op3;
275 
276     asm volatile("smlabb %0, %1, %2, %3"
277              : "=&r"(out)
278                          : "r"(aa),
279                          "r"(bb),
280                          "r"(cc));
281     return out;
282 }
283 
284 
mla392(int32 k0,int32 k14,int32 round)285 __inline int32 mla392(int32 k0, int32 k14, int32 round)
286 {
287     register int32 out;
288     register int32 aa = (int32)k0;
289     register int32 bb = (int32)k14;
290     register int32 cc = (int32)round;
291 
292     asm volatile("smlabt %0, %1, %2, %3"
293              : "=&r"(out)
294                          : "r"(aa),
295                          "r"(bb),
296                          "r"(cc));
297 
298     return out;
299 }
300 
mla554(int32 k4,int32 k12,int32 k1)301 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
302 {
303     register int32 out;
304     register int32 aa = (int32)k4;
305     register int32 bb = (int32)k12;
306     register int32 cc = (int32)k1;
307 
308     asm volatile("smlabt %0, %1, %2, %3"
309              : "=&r"(out)
310                          : "r"(aa),
311                          "r"(bb),
312                          "r"(cc));
313 
314     return out;
315 }
316 
mla1338(int32 k6,int32 k14,int32 k1)317 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
318 {
319     register int32 out;
320     register int32 aa = (int32)k6;
321     register int32 bb = (int32)k14;
322     register int32 cc = (int32)k1;
323 
324     asm volatile("smlabb %0, %1, %2, %3"
325              : "=&r"(out)
326                          : "r"(aa),
327                          "r"(bb),
328                          "r"(cc));
329     return out;
330 }
331 
mla946(int32 k6,int32 k14,int32 k1)332 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
333 {
334     register int32 out;
335     register int32 aa = (int32)k6;
336     register int32 bb = (int32)k14;
337     register int32 cc = (int32)k1;
338 
339     asm volatile("smlabb %0, %1, %2, %3"
340              : "=&r"(out)
341                          : "r"(aa),
342                          "r"(bb),
343                          "r"(cc));
344     return out;
345 }
346 
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)347 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
348                        int32 k4, int32 k5, int32 k6, int32 k7)
349 {
350     register int32 carry;
351     register int32 abs_sum;
352     register int32 aa = (int32)k0;
353     register int32 bb = (int32)k1;
354     register int32 cc = (int32)k2;
355     register int32 dd = (int32)k3;
356     register int32 ee = (int32)k4;
357     register int32 ff = (int32)k5;
358     register int32 gg = (int32)k6;
359     register int32 hh = (int32)k7;
360 
361     asm volatile("eor  %0, %2, %2, asr #31\n\t"
362                  "eors %1, %3, %3, asr #31\n\t"
363                  "adc  %1, %1, %0\n\t"
364                  "eors %0, %4, %4, asr #31\n\t"
365                  "adc  %1, %1, %0\n\t"
366                  "eors %0, %5, %5, asr #31\n\t"
367                  "adc  %1, %1, %0\n\t"
368                  "eors %0, %6, %6, asr #31\n\t"
369                  "adc  %1, %1, %0\n\t"
370                  "eors %0, %7, %7, asr #31\n\t"
371                  "adc  %1, %1, %0\n\t"
372                  "eors %0, %8, %8, asr #31\n\t"
373                  "adc  %1, %1, %0\n\t"
374                  "eors %0, %9, %9, asr #31\n\t"
375                  "adc  %1, %1, %0\n\t"
376 
377              : "=&r"(carry),
378                  "=&r"(abs_sum):
379                          "r"(aa),
380                          "r"(bb),
381                          "r"(cc),
382                          "r"(dd),
383                          "r"(ee),
384                          "r"(ff),
385                          "r"(gg),
386                          "r"(hh));
387 
388     return abs_sum;
389 }
390 
391 #endif // Diff. OS
392 
393 #endif //_DCT_INLINE_H_
394 
395 
396