1/* 2 * ARM NEON optimised DSP functions 3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavutil/arm/asm.S" 23 24function ff_vorbis_inverse_coupling_neon, export=1 25 vmov.i32 q10, #1<<31 26 subs r2, r2, #4 27 mov r3, r0 28 mov r12, r1 29 beq 3f 30 31 vld1.32 {d24-d25},[r1,:128]! 32 vld1.32 {d22-d23},[r0,:128]! 33 vcle.s32 q8, q12, #0 34 vand q9, q11, q10 35 veor q12, q12, q9 36 vand q2, q12, q8 37 vbic q3, q12, q8 38 vadd.f32 q12, q11, q2 39 vsub.f32 q11, q11, q3 401: vld1.32 {d2-d3}, [r1,:128]! 41 vld1.32 {d0-d1}, [r0,:128]! 42 vcle.s32 q8, q1, #0 43 vand q9, q0, q10 44 veor q1, q1, q9 45 vst1.32 {d24-d25},[r3, :128]! 46 vst1.32 {d22-d23},[r12,:128]! 47 vand q2, q1, q8 48 vbic q3, q1, q8 49 vadd.f32 q1, q0, q2 50 vsub.f32 q0, q0, q3 51 subs r2, r2, #8 52 ble 2f 53 vld1.32 {d24-d25},[r1,:128]! 54 vld1.32 {d22-d23},[r0,:128]! 55 vcle.s32 q8, q12, #0 56 vand q9, q11, q10 57 veor q12, q12, q9 58 vst1.32 {d2-d3}, [r3, :128]! 59 vst1.32 {d0-d1}, [r12,:128]! 60 vand q2, q12, q8 61 vbic q3, q12, q8 62 vadd.f32 q12, q11, q2 63 vsub.f32 q11, q11, q3 64 b 1b 65 662: vst1.32 {d2-d3}, [r3, :128]! 67 vst1.32 {d0-d1}, [r12,:128]! 68 it lt 69 bxlt lr 70 713: vld1.32 {d2-d3}, [r1,:128] 72 vld1.32 {d0-d1}, [r0,:128] 73 vcle.s32 q8, q1, #0 74 vand q9, q0, q10 75 veor q1, q1, q9 76 vand q2, q1, q8 77 vbic q3, q1, q8 78 vadd.f32 q1, q0, q2 79 vsub.f32 q0, q0, q3 80 vst1.32 {d2-d3}, [r0,:128]! 81 vst1.32 {d0-d1}, [r1,:128]! 82 bx lr 83endfunc 84