1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_short_walsh4x4_neon| 13 14 ARM 15 REQUIRE8 16 PRESERVE8 17 18 AREA ||.text||, CODE, READONLY, ALIGN=2 19;void vp8_short_walsh4x4_c(short *input, short *output, int pitch) 20 21|vp8_short_walsh4x4_neon| PROC 22 vld1.16 {d2}, [r0], r2 ;load input 23 vld1.16 {d3}, [r0], r2 24 vld1.16 {d4}, [r0], r2 25 vld1.16 {d5}, [r0], r2 26 27 ;First for-loop 28 ;transpose d2, d3, d4, d5. Then, d2=ip[0], d3=ip[1], d4=ip[2], d5=ip[3] 29 vtrn.32 d2, d4 30 vtrn.32 d3, d5 31 vtrn.16 d2, d3 32 vtrn.16 d4, d5 33 34 vadd.s16 d6, d2, d5 ;a1 = ip[0]+ip[3] 35 vadd.s16 d7, d3, d4 ;b1 = ip[1]+ip[2] 36 vsub.s16 d8, d3, d4 ;c1 = ip[1]-ip[2] 37 vsub.s16 d9, d2, d5 ;d1 = ip[0]-ip[3] 38 39 vadd.s16 d2, d6, d7 ;op[0] = a1 + b1 40 vsub.s16 d4, d6, d7 ;op[2] = a1 - b1 41 vadd.s16 d3, d8, d9 ;op[1] = c1 + d1 42 vsub.s16 d5, d9, d8 ;op[3] = d1 - c1 43 44 ;Second for-loop 45 ;transpose d2, d3, d4, d5. Then, d2=ip[0], d3=ip[4], d4=ip[8], d5=ip[12] 46 vtrn.32 d2, d4 47 vtrn.32 d3, d5 48 vtrn.16 d2, d3 49 vtrn.16 d4, d5 50 51 vadd.s16 d6, d2, d5 ;a1 = ip[0]+ip[12] 52 vadd.s16 d7, d3, d4 ;b1 = ip[4]+ip[8] 53 vsub.s16 d8, d3, d4 ;c1 = ip[4]-ip[8] 54 vsub.s16 d9, d2, d5 ;d1 = ip[0]-ip[12] 55 56 vadd.s16 d2, d6, d7 ;a2 = a1 + b1; 57 vsub.s16 d4, d6, d7 ;c2 = a1 - b1; 58 vadd.s16 d3, d8, d9 ;b2 = c1 + d1; 59 vsub.s16 d5, d9, d8 ;d2 = d1 - c1; 60 61 vcgt.s16 q3, q1, #0 62 vcgt.s16 q4, q2, #0 63 64 vsub.s16 q1, q1, q3 65 vsub.s16 q2, q2, q4 66 67 vshr.s16 q1, q1, #1 68 vshr.s16 q2, q2, #1 69 70 vst1.16 {q1, q2}, [r1] 71 72 bx lr 73 74 ENDP 75 76 END 77