1/* 2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/arm/asm.S" 22 23function ff_pix_abs16_armv6, export=1 24 ldr r0, [sp] 25 push {r4-r9, lr} 26 mov r12, #0 27 mov lr, #0 28 ldm r1, {r4-r7} 29 ldr r8, [r2] 301: 31 ldr r9, [r2, #4] 32 pld [r1, r3] 33 usada8 r12, r4, r8, r12 34 ldr r8, [r2, #8] 35 pld [r2, r3] 36 usada8 lr, r5, r9, lr 37 ldr r9, [r2, #12] 38 usada8 r12, r6, r8, r12 39 subs r0, r0, #1 40 usada8 lr, r7, r9, lr 41 beq 2f 42 add r1, r1, r3 43 ldm r1, {r4-r7} 44 add r2, r2, r3 45 ldr r8, [r2] 46 b 1b 472: 48 add r0, r12, lr 49 pop {r4-r9, pc} 50endfunc 51 52function ff_pix_abs16_x2_armv6, export=1 53 ldr r12, [sp] 54 push {r4-r11, lr} 55 mov r0, #0 56 mov lr, #1 57 orr lr, lr, lr, lsl #8 58 orr lr, lr, lr, lsl #16 591: 60 ldr r8, [r2] 61 ldr r9, [r2, #4] 62 lsr r10, r8, #8 63 ldr r4, [r1] 64 lsr r6, r9, #8 65 orr r10, r10, r9, lsl #24 66 ldr r5, [r2, #8] 67 eor r11, r8, r10 68 uhadd8 r7, r8, r10 69 orr r6, r6, r5, lsl #24 70 and r11, r11, lr 71 uadd8 r7, r7, r11 72 ldr r8, [r1, #4] 73 usada8 r0, r4, r7, r0 74 eor r7, r9, r6 75 lsr r10, r5, #8 76 and r7, r7, lr 77 uhadd8 r4, r9, r6 78 ldr r6, [r2, #12] 79 uadd8 r4, r4, r7 80 pld [r1, r3] 81 orr r10, r10, r6, lsl #24 82 usada8 r0, r8, r4, r0 83 ldr r4, [r1, #8] 84 eor r11, r5, r10 85 ldrb r7, [r2, #16] 86 and r11, r11, lr 87 uhadd8 r8, r5, r10 88 ldr r5, [r1, #12] 89 uadd8 r8, r8, r11 90 pld [r2, r3] 91 lsr r10, r6, #8 92 usada8 r0, r4, r8, r0 93 orr r10, r10, r7, lsl #24 94 subs r12, r12, #1 95 eor r11, r6, r10 96 add r1, r1, r3 97 uhadd8 r9, r6, r10 98 and r11, r11, lr 99 uadd8 r9, r9, r11 100 add r2, r2, r3 101 usada8 r0, r5, r9, r0 102 bgt 1b 103 104 pop {r4-r11, pc} 105endfunc 106 107.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 108 ldr \n0, [r2] 109 eor \n1, \p0, \n0 110 uhadd8 \p0, \p0, \n0 111 and \n1, \n1, lr 112 ldr \n2, [r1] 113 uadd8 \p0, \p0, \n1 114 ldr \n1, [r2, #4] 115 usada8 r0, \p0, \n2, r0 116 pld [r1, r3] 117 eor \n3, \p1, \n1 118 uhadd8 \p1, \p1, \n1 119 and \n3, \n3, lr 120 ldr \p0, [r1, #4] 121 uadd8 \p1, \p1, \n3 122 ldr \n2, [r2, #8] 123 usada8 r0, \p1, \p0, r0 124 pld [r2, r3] 125 eor \p0, \p2, \n2 126 uhadd8 \p2, \p2, \n2 127 and \p0, \p0, lr 128 ldr \p1, [r1, #8] 129 uadd8 \p2, \p2, \p0 130 ldr \n3, [r2, #12] 131 usada8 r0, \p2, \p1, r0 132 eor \p1, \p3, \n3 133 uhadd8 \p3, \p3, \n3 134 and \p1, \p1, lr 135 ldr \p0, [r1, #12] 136 uadd8 \p3, \p3, \p1 137 add r1, r1, r3 138 usada8 r0, \p3, \p0, r0 139 add r2, r2, r3 140.endm 141 142function ff_pix_abs16_y2_armv6, export=1 143 pld [r1] 144 pld [r2] 145 ldr r12, [sp] 146 push {r4-r11, lr} 147 mov r0, #0 148 mov lr, #1 149 orr lr, lr, lr, lsl #8 150 orr lr, lr, lr, lsl #16 151 ldr r4, [r2] 152 ldr r5, [r2, #4] 153 ldr r6, [r2, #8] 154 ldr r7, [r2, #12] 155 add r2, r2, r3 1561: 157 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 158 subs r12, r12, #2 159 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 160 bgt 1b 161 162 pop {r4-r11, pc} 163endfunc 164 165function ff_pix_abs8_armv6, export=1 166 pld [r2, r3] 167 ldr r12, [sp] 168 push {r4-r9, lr} 169 mov r0, #0 170 mov lr, #0 171 ldrd_post r4, r5, r1, r3 1721: 173 subs r12, r12, #2 174 ldr r7, [r2, #4] 175 ldr_post r6, r2, r3 176 ldrd_post r8, r9, r1, r3 177 usada8 r0, r4, r6, r0 178 pld [r2, r3] 179 usada8 lr, r5, r7, lr 180 ldr r7, [r2, #4] 181 ldr_post r6, r2, r3 182 beq 2f 183 ldrd_post r4, r5, r1, r3 184 usada8 r0, r8, r6, r0 185 pld [r2, r3] 186 usada8 lr, r9, r7, lr 187 b 1b 1882: 189 usada8 r0, r8, r6, r0 190 usada8 lr, r9, r7, lr 191 add r0, r0, lr 192 pop {r4-r9, pc} 193endfunc 194 195function ff_sse16_armv6, export=1 196 ldr r12, [sp] 197 push {r4-r9, lr} 198 mov r0, #0 1991: 200 ldrd r4, r5, [r1] 201 ldr r8, [r2] 202 uxtb16 lr, r4 203 uxtb16 r4, r4, ror #8 204 uxtb16 r9, r8 205 uxtb16 r8, r8, ror #8 206 ldr r7, [r2, #4] 207 usub16 lr, lr, r9 208 usub16 r4, r4, r8 209 smlad r0, lr, lr, r0 210 uxtb16 r6, r5 211 uxtb16 lr, r5, ror #8 212 uxtb16 r8, r7 213 uxtb16 r9, r7, ror #8 214 smlad r0, r4, r4, r0 215 ldrd r4, r5, [r1, #8] 216 usub16 r6, r6, r8 217 usub16 r8, lr, r9 218 ldr r7, [r2, #8] 219 smlad r0, r6, r6, r0 220 uxtb16 lr, r4 221 uxtb16 r4, r4, ror #8 222 uxtb16 r9, r7 223 uxtb16 r7, r7, ror #8 224 smlad r0, r8, r8, r0 225 ldr r8, [r2, #12] 226 usub16 lr, lr, r9 227 usub16 r4, r4, r7 228 smlad r0, lr, lr, r0 229 uxtb16 r6, r5 230 uxtb16 r5, r5, ror #8 231 uxtb16 r9, r8 232 uxtb16 r8, r8, ror #8 233 smlad r0, r4, r4, r0 234 usub16 r6, r6, r9 235 usub16 r5, r5, r8 236 smlad r0, r6, r6, r0 237 add r1, r1, r3 238 add r2, r2, r3 239 subs r12, r12, #1 240 smlad r0, r5, r5, r0 241 bgt 1b 242 243 pop {r4-r9, pc} 244endfunc 245