1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /* Filename: dct_inline.h */
19 /* Description: Implementation for in-line functions used in dct.cpp */
20 /* Modified: */
21 /*********************************************************************************/
22 #ifndef _DCT_INLINE_H_
23 #define _DCT_INLINE_H_
24
25 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4)
26
mla724(int32 op1,int32 op2,int32 op3)27 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
28 {
29 int32 out;
30
31 OSCL_UNUSED_ARG(op1);
32
33 out = op2 * 724 + op3; /* op1 is not used here */
34
35 return out;
36 }
37
mla392(int32 k0,int32 k14,int32 round)38 __inline int32 mla392(int32 k0, int32 k14, int32 round)
39 {
40 int32 k1;
41
42 OSCL_UNUSED_ARG(k14);
43
44 k1 = k0 * 392 + round;
45
46 return k1;
47 }
48
mla554(int32 k4,int32 k12,int32 k1)49 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
50 {
51 int32 k0;
52
53 OSCL_UNUSED_ARG(k12);
54
55 k0 = k4 * 554 + k1;
56
57 return k0;
58 }
59
mla1338(int32 k6,int32 k14,int32 k1)60 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
61 {
62 int32 out;
63
64 OSCL_UNUSED_ARG(k14);
65
66 out = k6 * 1338 + k1;
67
68 return out;
69 }
70
mla946(int32 k6,int32 k14,int32 k1)71 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
72 {
73 int32 out;
74
75 OSCL_UNUSED_ARG(k14);
76
77 out = k6 * 946 + k1;
78
79 return out;
80 }
81
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)82 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
83 int32 k4, int32 k5, int32 k6, int32 k7)
84 {
85 int32 carry, abs_sum;
86
87 carry = k0 >> 31;
88 abs_sum = (k0 ^ carry);
89 carry = k1 >> 31;
90 abs_sum += (k1 ^ carry) - carry;
91 carry = k2 >> 31;
92 abs_sum += (k2 ^ carry) - carry;
93 carry = k3 >> 31;
94 abs_sum += (k3 ^ carry) - carry;
95 carry = k4 >> 31;
96 abs_sum += (k4 ^ carry) - carry;
97 carry = k5 >> 31;
98 abs_sum += (k5 ^ carry) - carry;
99 carry = k6 >> 31;
100 abs_sum += (k6 ^ carry) - carry;
101 carry = k7 >> 31;
102 abs_sum += (k7 ^ carry) - carry;
103
104 return abs_sum;
105 }
106
107 #elif defined(__CC_ARM) /* only work with arm v5 */
108
109 #if defined(__TARGET_ARCH_5TE)
110
mla724(int32 op1,int32 op2,int32 op3)111 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
112 {
113 int32 out;
114
115 __asm
116 {
117 smlabb out, op1, op2, op3
118 }
119
120 return out;
121 }
122
mla392(int32 k0,int32 k14,int32 round)123 __inline int32 mla392(int32 k0, int32 k14, int32 round)
124 {
125 int32 k1;
126
127 __asm
128 {
129 smlabt k1, k0, k14, round
130 }
131
132 return k1;
133 }
134
mla554(int32 k4,int32 k12,int32 k1)135 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
136 {
137 int32 k0;
138
139 __asm
140 {
141 smlabt k0, k4, k12, k1
142 }
143
144 return k0;
145 }
146
mla1338(int32 k6,int32 k14,int32 k1)147 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
148 {
149 int32 out;
150
151 __asm
152 {
153 smlabb out, k6, k14, k1
154 }
155
156 return out;
157 }
158
mla946(int32 k6,int32 k14,int32 k1)159 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
160 {
161 int32 out;
162
163 __asm
164 {
165 smlabb out, k6, k14, k1
166 }
167
168 return out;
169 }
170
171 #else // not ARM5TE
172
173
mla724(int32 op1,int32 op2,int32 op3)174 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
175 {
176 int32 out;
177
178 __asm
179 {
180 and out, op2, #0xFFFF
181 mla out, op1, out, op3
182 }
183
184 return out;
185 }
186
mla392(int32 k0,int32 k14,int32 round)187 __inline int32 mla392(int32 k0, int32 k14, int32 round)
188 {
189 int32 k1;
190
191 __asm
192 {
193 mov k1, k14, asr #16
194 mla k1, k0, k1, round
195 }
196
197 return k1;
198 }
199
mla554(int32 k4,int32 k12,int32 k1)200 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
201 {
202 int32 k0;
203
204 __asm
205 {
206 mov k0, k12, asr #16
207 mla k0, k4, k0, k1
208 }
209
210 return k0;
211 }
212
mla1338(int32 k6,int32 k14,int32 k1)213 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
214 {
215 int32 out;
216
217 __asm
218 {
219 and out, k14, 0xFFFF
220 mla out, k6, out, k1
221 }
222
223 return out;
224 }
225
mla946(int32 k6,int32 k14,int32 k1)226 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
227 {
228 int32 out;
229
230 __asm
231 {
232 and out, k14, 0xFFFF
233 mla out, k6, out, k1
234 }
235
236 return out;
237 }
238
239 #endif
240
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)241 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
242 int32 k4, int32 k5, int32 k6, int32 k7)
243 {
244 int32 carry, abs_sum;
245 __asm
246 {
247 eor carry, k0, k0, asr #31 ;
248 eors abs_sum, k1, k1, asr #31 ;
249 adc abs_sum, abs_sum, carry ;
250 eors carry, k2, k2, asr #31 ;
251 adc abs_sum, abs_sum, carry ;
252 eors carry, k3, k3, asr #31 ;
253 adc abs_sum, abs_sum, carry ;
254 eors carry, k4, k4, asr #31 ;
255 adc abs_sum, abs_sum, carry ;
256 eors carry, k5, k5, asr #31 ;
257 adc abs_sum, abs_sum, carry ;
258 eors carry, k6, k6, asr #31 ;
259 adc abs_sum, abs_sum, carry ;
260 eors carry, k7, k7, asr #31 ;
261 adc abs_sum, abs_sum, carry ;
262 }
263
264 return abs_sum;
265 }
266
267 #elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) ) /* ARM GNU COMPILER */
268
mla724(int32 op1,int32 op2,int32 op3)269 __inline int32 mla724(int32 op1, int32 op2, int32 op3)
270 {
271 register int32 out;
272 register int32 aa = (int32)op1;
273 register int32 bb = (int32)op2;
274 register int32 cc = (int32)op3;
275
276 asm volatile("smlabb %0, %1, %2, %3"
277 : "=&r"(out)
278 : "r"(aa),
279 "r"(bb),
280 "r"(cc));
281 return out;
282 }
283
284
mla392(int32 k0,int32 k14,int32 round)285 __inline int32 mla392(int32 k0, int32 k14, int32 round)
286 {
287 register int32 out;
288 register int32 aa = (int32)k0;
289 register int32 bb = (int32)k14;
290 register int32 cc = (int32)round;
291
292 asm volatile("smlabt %0, %1, %2, %3"
293 : "=&r"(out)
294 : "r"(aa),
295 "r"(bb),
296 "r"(cc));
297
298 return out;
299 }
300
mla554(int32 k4,int32 k12,int32 k1)301 __inline int32 mla554(int32 k4, int32 k12, int32 k1)
302 {
303 register int32 out;
304 register int32 aa = (int32)k4;
305 register int32 bb = (int32)k12;
306 register int32 cc = (int32)k1;
307
308 asm volatile("smlabt %0, %1, %2, %3"
309 : "=&r"(out)
310 : "r"(aa),
311 "r"(bb),
312 "r"(cc));
313
314 return out;
315 }
316
mla1338(int32 k6,int32 k14,int32 k1)317 __inline int32 mla1338(int32 k6, int32 k14, int32 k1)
318 {
319 register int32 out;
320 register int32 aa = (int32)k6;
321 register int32 bb = (int32)k14;
322 register int32 cc = (int32)k1;
323
324 asm volatile("smlabb %0, %1, %2, %3"
325 : "=&r"(out)
326 : "r"(aa),
327 "r"(bb),
328 "r"(cc));
329 return out;
330 }
331
mla946(int32 k6,int32 k14,int32 k1)332 __inline int32 mla946(int32 k6, int32 k14, int32 k1)
333 {
334 register int32 out;
335 register int32 aa = (int32)k6;
336 register int32 bb = (int32)k14;
337 register int32 cc = (int32)k1;
338
339 asm volatile("smlabb %0, %1, %2, %3"
340 : "=&r"(out)
341 : "r"(aa),
342 "r"(bb),
343 "r"(cc));
344 return out;
345 }
346
sum_abs(int32 k0,int32 k1,int32 k2,int32 k3,int32 k4,int32 k5,int32 k6,int32 k7)347 __inline int32 sum_abs(int32 k0, int32 k1, int32 k2, int32 k3,
348 int32 k4, int32 k5, int32 k6, int32 k7)
349 {
350 register int32 carry;
351 register int32 abs_sum;
352 register int32 aa = (int32)k0;
353 register int32 bb = (int32)k1;
354 register int32 cc = (int32)k2;
355 register int32 dd = (int32)k3;
356 register int32 ee = (int32)k4;
357 register int32 ff = (int32)k5;
358 register int32 gg = (int32)k6;
359 register int32 hh = (int32)k7;
360
361 asm volatile("eor %0, %2, %2, asr #31\n\t"
362 "eors %1, %3, %3, asr #31\n\t"
363 "adc %1, %1, %0\n\t"
364 "eors %0, %4, %4, asr #31\n\t"
365 "adc %1, %1, %0\n\t"
366 "eors %0, %5, %5, asr #31\n\t"
367 "adc %1, %1, %0\n\t"
368 "eors %0, %6, %6, asr #31\n\t"
369 "adc %1, %1, %0\n\t"
370 "eors %0, %7, %7, asr #31\n\t"
371 "adc %1, %1, %0\n\t"
372 "eors %0, %8, %8, asr #31\n\t"
373 "adc %1, %1, %0\n\t"
374 "eors %0, %9, %9, asr #31\n\t"
375 "adc %1, %1, %0\n\t"
376
377 : "=&r"(carry),
378 "=&r"(abs_sum):
379 "r"(aa),
380 "r"(bb),
381 "r"(cc),
382 "r"(dd),
383 "r"(ee),
384 "r"(ff),
385 "r"(gg),
386 "r"(hh));
387
388 return abs_sum;
389 }
390
391 #endif // Diff. OS
392
393 #endif //_DCT_INLINE_H_
394
395
396