1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_subtract_b_neon_func| 13 EXPORT |vp8_subtract_mby_neon| 14 EXPORT |vp8_subtract_mbuv_neon| 15 16 ARM 17 REQUIRE8 18 PRESERVE8 19 20 AREA ||.text||, CODE, READONLY, ALIGN=2 21;========================================= 22;void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch); 23|vp8_subtract_b_neon_func| PROC 24 ldr r12, [sp] ;load pitch 25 26 vld1.8 {d0}, [r1], r3 ;load src 27 vld1.8 {d1}, [r2], r12 ;load pred 28 vld1.8 {d2}, [r1], r3 29 vld1.8 {d3}, [r2], r12 30 vld1.8 {d4}, [r1], r3 31 vld1.8 {d5}, [r2], r12 32 vld1.8 {d6}, [r1], r3 33 vld1.8 {d7}, [r2], r12 34 35 vsubl.u8 q10, d0, d1 36 vsubl.u8 q11, d2, d3 37 vsubl.u8 q12, d4, d5 38 vsubl.u8 q13, d6, d7 39 40 mov r12, r12, lsl #1 41 42 vst1.16 {d20}, [r0], r12 ;store diff 43 vst1.16 {d22}, [r0], r12 44 vst1.16 {d24}, [r0], r12 45 vst1.16 {d26}, [r0], r12 46 47 bx lr 48 ENDP 49 50;========================================== 51;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride) 52|vp8_subtract_mby_neon| PROC 53 mov r12, #4 54 55subtract_mby_loop 56 vld1.8 {q0}, [r1], r3 ;load src 57 vld1.8 {q1}, [r2]! ;load pred 58 vld1.8 {q2}, [r1], r3 59 vld1.8 {q3}, [r2]! 60 vld1.8 {q4}, [r1], r3 61 vld1.8 {q5}, [r2]! 62 vld1.8 {q6}, [r1], r3 63 vld1.8 {q7}, [r2]! 64 65 vsubl.u8 q8, d0, d2 66 vsubl.u8 q9, d1, d3 67 vsubl.u8 q10, d4, d6 68 vsubl.u8 q11, d5, d7 69 vsubl.u8 q12, d8, d10 70 vsubl.u8 q13, d9, d11 71 vsubl.u8 q14, d12, d14 72 vsubl.u8 q15, d13, d15 73 74 vst1.16 {q8}, [r0]! ;store diff 75 vst1.16 {q9}, [r0]! 76 vst1.16 {q10}, [r0]! 77 vst1.16 {q11}, [r0]! 78 vst1.16 {q12}, [r0]! 79 vst1.16 {q13}, [r0]! 80 vst1.16 {q14}, [r0]! 81 vst1.16 {q15}, [r0]! 82 83 subs r12, r12, #1 84 bne subtract_mby_loop 85 86 bx lr 87 ENDP 88 89;================================= 90;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) 91|vp8_subtract_mbuv_neon| PROC 92 ldr r12, [sp] 93 94;u 95 add r0, r0, #512 ; short *udiff = diff + 256; 96 add r3, r3, #256 ; unsigned char *upred = pred + 256; 97 98 vld1.8 {d0}, [r1], r12 ;load src 99 vld1.8 {d1}, [r3]! ;load pred 100 vld1.8 {d2}, [r1], r12 101 vld1.8 {d3}, [r3]! 102 vld1.8 {d4}, [r1], r12 103 vld1.8 {d5}, [r3]! 104 vld1.8 {d6}, [r1], r12 105 vld1.8 {d7}, [r3]! 106 vld1.8 {d8}, [r1], r12 107 vld1.8 {d9}, [r3]! 108 vld1.8 {d10}, [r1], r12 109 vld1.8 {d11}, [r3]! 110 vld1.8 {d12}, [r1], r12 111 vld1.8 {d13}, [r3]! 112 vld1.8 {d14}, [r1], r12 113 vld1.8 {d15}, [r3]! 114 115 vsubl.u8 q8, d0, d1 116 vsubl.u8 q9, d2, d3 117 vsubl.u8 q10, d4, d5 118 vsubl.u8 q11, d6, d7 119 vsubl.u8 q12, d8, d9 120 vsubl.u8 q13, d10, d11 121 vsubl.u8 q14, d12, d13 122 vsubl.u8 q15, d14, d15 123 124 vst1.16 {q8}, [r0]! ;store diff 125 vst1.16 {q9}, [r0]! 126 vst1.16 {q10}, [r0]! 127 vst1.16 {q11}, [r0]! 128 vst1.16 {q12}, [r0]! 129 vst1.16 {q13}, [r0]! 130 vst1.16 {q14}, [r0]! 131 vst1.16 {q15}, [r0]! 132 133;v 134 vld1.8 {d0}, [r2], r12 ;load src 135 vld1.8 {d1}, [r3]! ;load pred 136 vld1.8 {d2}, [r2], r12 137 vld1.8 {d3}, [r3]! 138 vld1.8 {d4}, [r2], r12 139 vld1.8 {d5}, [r3]! 140 vld1.8 {d6}, [r2], r12 141 vld1.8 {d7}, [r3]! 142 vld1.8 {d8}, [r2], r12 143 vld1.8 {d9}, [r3]! 144 vld1.8 {d10}, [r2], r12 145 vld1.8 {d11}, [r3]! 146 vld1.8 {d12}, [r2], r12 147 vld1.8 {d13}, [r3]! 148 vld1.8 {d14}, [r2], r12 149 vld1.8 {d15}, [r3]! 150 151 vsubl.u8 q8, d0, d1 152 vsubl.u8 q9, d2, d3 153 vsubl.u8 q10, d4, d5 154 vsubl.u8 q11, d6, d7 155 vsubl.u8 q12, d8, d9 156 vsubl.u8 q13, d10, d11 157 vsubl.u8 q14, d12, d13 158 vsubl.u8 q15, d14, d15 159 160 vst1.16 {q8}, [r0]! ;store diff 161 vst1.16 {q9}, [r0]! 162 vst1.16 {q10}, [r0]! 163 vst1.16 {q11}, [r0]! 164 vst1.16 {q12}, [r0]! 165 vst1.16 {q13}, [r0]! 166 vst1.16 {q14}, [r0]! 167 vst1.16 {q15}, [r0]! 168 169 bx lr 170 ENDP 171 172 END 173