1/* 2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/arm/asm.S" 22 23.macro vp6_edge_filter 24 vdup.16 q3, r2 @ t 25 vmov.i16 q13, #1 26 vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s] 27 vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s] 28 vsubl.u8 q14, d21, d19 29 vsubl.u8 q15, d17, d23 30 vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s]) 31 vadd.i16 d29, d28, d28 32 vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s] 33 vadd.i16 d28, d28, d30 34 vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s] 35 vadd.i16 d28, d28, d29 36 vrshr.s16 q0, q0, #3 @ v 37 vrshr.s16 d28, d28, #3 38 vsub.i16 q8, q3, q13 @ t-1 39 vabs.s16 q1, q0 @ V 40 vshr.s16 q2, q0, #15 @ s 41 vabs.s16 d30, d28 42 vshr.s16 d29, d28, #15 43 vsub.i16 q12, q1, q3 @ V-t 44 vsub.i16 d31, d30, d6 45 vsub.i16 q12, q12, q13 @ V-t-1 46 vsub.i16 d31, d31, d26 47 vcge.u16 q12, q12, q8 @ V-t-1 >= t-1 48 vcge.u16 d31, d31, d16 49 vadd.i16 q13, q3, q3 @ 2*t 50 vadd.i16 d16, d6, d6 51 vsub.i16 q13, q13, q1 @ 2*t - V 52 vsub.i16 d16, d16, d30 53 vadd.i16 q13, q13, q2 @ += s 54 vadd.i16 d16, d16, d29 55 veor q13, q13, q2 @ ^= s 56 veor d16, d16, d29 57 vbif q0, q13, q12 58 vbif d28, d16, d31 59 vmovl.u8 q1, d20 60 vmovl.u8 q15, d21 61 vaddw.u8 q2, q0, d18 62 vaddw.u8 q3, q14, d19 63 vsub.i16 q1, q1, q0 64 vsub.i16 d30, d30, d28 65 vqmovun.s16 d18, q2 66 vqmovun.s16 d19, q3 67 vqmovun.s16 d20, q1 68 vqmovun.s16 d21, q15 69.endm 70 71function ff_vp6_edge_filter_ver_neon, export=1 72 sub r0, r0, r1, lsl #1 73 vld1.8 {q8}, [r0], r1 @ p[-2*s] 74 vld1.8 {q9}, [r0], r1 @ p[-s] 75 vld1.8 {q10}, [r0], r1 @ p[0] 76 vld1.8 {q11}, [r0] @ p[s] 77 vp6_edge_filter 78 sub r0, r0, r1, lsl #1 79 sub r1, r1, #8 80 vst1.8 {d18}, [r0]! 81 vst1.32 {d19[0]}, [r0], r1 82 vst1.8 {d20}, [r0]! 83 vst1.32 {d21[0]}, [r0] 84 bx lr 85endfunc 86 87function ff_vp6_edge_filter_hor_neon, export=1 88 sub r3, r0, #1 89 sub r0, r0, #2 90 vld1.32 {d16[0]}, [r0], r1 91 vld1.32 {d18[0]}, [r0], r1 92 vld1.32 {d20[0]}, [r0], r1 93 vld1.32 {d22[0]}, [r0], r1 94 vld1.32 {d16[1]}, [r0], r1 95 vld1.32 {d18[1]}, [r0], r1 96 vld1.32 {d20[1]}, [r0], r1 97 vld1.32 {d22[1]}, [r0], r1 98 vld1.32 {d17[0]}, [r0], r1 99 vld1.32 {d19[0]}, [r0], r1 100 vld1.32 {d21[0]}, [r0], r1 101 vld1.32 {d23[0]}, [r0], r1 102 vtrn.8 q8, q9 103 vtrn.8 q10, q11 104 vtrn.16 q8, q10 105 vtrn.16 q9, q11 106 vp6_edge_filter 107 vtrn.8 q9, q10 108 vst1.16 {d18[0]}, [r3], r1 109 vst1.16 {d20[0]}, [r3], r1 110 vst1.16 {d18[1]}, [r3], r1 111 vst1.16 {d20[1]}, [r3], r1 112 vst1.16 {d18[2]}, [r3], r1 113 vst1.16 {d20[2]}, [r3], r1 114 vst1.16 {d18[3]}, [r3], r1 115 vst1.16 {d20[3]}, [r3], r1 116 vst1.16 {d19[0]}, [r3], r1 117 vst1.16 {d21[0]}, [r3], r1 118 vst1.16 {d19[1]}, [r3], r1 119 vst1.16 {d21[1]}, [r3], r1 120 bx lr 121endfunc 122