1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_dequant_idct_add_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 AREA ||.text||, CODE, READONLY, ALIGN=2 18;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred, 19; unsigned char *dest, int pitch, int stride) 20; r0 short *input, 21; r1 short *dq, 22; r2 unsigned char *pred 23; r3 unsigned char *dest 24; sp int pitch 25; sp+4 int stride 26 27|vp8_dequant_idct_add_neon| PROC 28 vld1.16 {q3, q4}, [r0] 29 vld1.16 {q5, q6}, [r1] 30 ldr r1, [sp] ; pitch 31 vld1.32 {d14[0]}, [r2], r1 32 vld1.32 {d14[1]}, [r2], r1 33 vld1.32 {d15[0]}, [r2], r1 34 vld1.32 {d15[1]}, [r2] 35 36 ldr r1, [sp, #4] ; stride 37 38 adr r12, _CONSTANTS_ 39 40 vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon 41 vmul.i16 q2, q4, q6 42 43;|short_idct4x4llm_neon| PROC 44 vld1.16 {d0}, [r12] 45 vswp d3, d4 ;q2(vp[4] vp[12]) 46 47 vqdmulh.s16 q3, q2, d0[2] 48 vqdmulh.s16 q4, q2, d0[0] 49 50 vqadd.s16 d12, d2, d3 ;a1 51 vqsub.s16 d13, d2, d3 ;b1 52 53 vshr.s16 q3, q3, #1 54 vshr.s16 q4, q4, #1 55 56 vqadd.s16 q3, q3, q2 57 vqadd.s16 q4, q4, q2 58 59 vqsub.s16 d10, d6, d9 ;c1 60 vqadd.s16 d11, d7, d8 ;d1 61 62 vqadd.s16 d2, d12, d11 63 vqadd.s16 d3, d13, d10 64 vqsub.s16 d4, d13, d10 65 vqsub.s16 d5, d12, d11 66 67 vtrn.32 d2, d4 68 vtrn.32 d3, d5 69 vtrn.16 d2, d3 70 vtrn.16 d4, d5 71 72; memset(input, 0, 32) -- 32bytes 73 vmov.i16 q14, #0 74 75 vswp d3, d4 76 vqdmulh.s16 q3, q2, d0[2] 77 vqdmulh.s16 q4, q2, d0[0] 78 79 vqadd.s16 d12, d2, d3 ;a1 80 vqsub.s16 d13, d2, d3 ;b1 81 82 vmov q15, q14 83 84 vshr.s16 q3, q3, #1 85 vshr.s16 q4, q4, #1 86 87 vqadd.s16 q3, q3, q2 88 vqadd.s16 q4, q4, q2 89 90 vqsub.s16 d10, d6, d9 ;c1 91 vqadd.s16 d11, d7, d8 ;d1 92 93 vqadd.s16 d2, d12, d11 94 vqadd.s16 d3, d13, d10 95 vqsub.s16 d4, d13, d10 96 vqsub.s16 d5, d12, d11 97 98 vst1.16 {q14, q15}, [r0] 99 100 vrshr.s16 d2, d2, #3 101 vrshr.s16 d3, d3, #3 102 vrshr.s16 d4, d4, #3 103 vrshr.s16 d5, d5, #3 104 105 vtrn.32 d2, d4 106 vtrn.32 d3, d5 107 vtrn.16 d2, d3 108 vtrn.16 d4, d5 109 110 vaddw.u8 q1, q1, d14 111 vaddw.u8 q2, q2, d15 112 113 vqmovun.s16 d0, q1 114 vqmovun.s16 d1, q2 115 116 vst1.32 {d0[0]}, [r3], r1 117 vst1.32 {d0[1]}, [r3], r1 118 vst1.32 {d1[0]}, [r3], r1 119 vst1.32 {d1[1]}, [r3] 120 121 bx lr 122 123 ENDP ; |vp8_dequant_idct_add_neon| 124 125; Constant Pool 126_CONSTANTS_ 127cospi8sqrt2minus1 DCD 0x4e7b4e7b 128sinpi8sqrt2 DCD 0x8a8c8a8c 129 130 END 131