• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro  vp6_edge_filter
24        vdup.16         q3,  r2                 @ t
25        vmov.i16        q13, #1
26        vsubl.u8        q0,  d20, d18           @ p[   0] - p[-s]
27        vsubl.u8        q1,  d16, d22           @ p[-2*s] - p[ s]
28        vsubl.u8        q14, d21, d19
29        vsubl.u8        q15, d17, d23
30        vadd.i16        q2,  q0,  q0            @ 2*(p[0]-p[-s])
31        vadd.i16        d29, d28, d28
32        vadd.i16        q0,  q0,  q1            @    p[0]-p[-s]  + p[-2*s]-p[s]
33        vadd.i16        d28, d28, d30
34        vadd.i16        q0,  q0,  q2            @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
35        vadd.i16        d28, d28, d29
36        vrshr.s16       q0,  q0,  #3            @ v
37        vrshr.s16       d28, d28, #3
38        vsub.i16        q8,  q3,  q13           @ t-1
39        vabs.s16        q1,  q0                 @ V
40        vshr.s16        q2,  q0,  #15           @ s
41        vabs.s16        d30, d28
42        vshr.s16        d29, d28, #15
43        vsub.i16        q12, q1,  q3            @ V-t
44        vsub.i16        d31, d30, d6
45        vsub.i16        q12, q12, q13           @ V-t-1
46        vsub.i16        d31, d31, d26
47        vcge.u16        q12, q12, q8            @ V-t-1 >= t-1
48        vcge.u16        d31, d31, d16
49        vadd.i16        q13, q3,  q3            @ 2*t
50        vadd.i16        d16, d6,  d6
51        vsub.i16        q13, q13, q1            @ 2*t - V
52        vsub.i16        d16, d16, d30
53        vadd.i16        q13, q13, q2            @ += s
54        vadd.i16        d16, d16, d29
55        veor            q13, q13, q2            @ ^= s
56        veor            d16, d16, d29
57        vbif            q0,  q13, q12
58        vbif            d28, d16, d31
59        vmovl.u8        q1,  d20
60        vmovl.u8        q15, d21
61        vaddw.u8        q2,  q0,  d18
62        vaddw.u8        q3,  q14, d19
63        vsub.i16        q1,  q1,  q0
64        vsub.i16        d30, d30, d28
65        vqmovun.s16     d18, q2
66        vqmovun.s16     d19, q3
67        vqmovun.s16     d20, q1
68        vqmovun.s16     d21, q15
69.endm
70
71function ff_vp6_edge_filter_ver_neon, export=1
72        sub             r0,  r0,  r1,  lsl #1
73        vld1.8          {q8},     [r0], r1      @ p[-2*s]
74        vld1.8          {q9},     [r0], r1      @ p[-s]
75        vld1.8          {q10},    [r0], r1      @ p[0]
76        vld1.8          {q11},    [r0]          @ p[s]
77        vp6_edge_filter
78        sub             r0,  r0,  r1,  lsl #1
79        sub             r1,  r1,  #8
80        vst1.8          {d18},    [r0]!
81        vst1.32         {d19[0]}, [r0], r1
82        vst1.8          {d20},    [r0]!
83        vst1.32         {d21[0]}, [r0]
84        bx              lr
85endfunc
86
87function ff_vp6_edge_filter_hor_neon, export=1
88        sub             r3,  r0,  #1
89        sub             r0,  r0,  #2
90        vld1.32         {d16[0]}, [r0], r1
91        vld1.32         {d18[0]}, [r0], r1
92        vld1.32         {d20[0]}, [r0], r1
93        vld1.32         {d22[0]}, [r0], r1
94        vld1.32         {d16[1]}, [r0], r1
95        vld1.32         {d18[1]}, [r0], r1
96        vld1.32         {d20[1]}, [r0], r1
97        vld1.32         {d22[1]}, [r0], r1
98        vld1.32         {d17[0]}, [r0], r1
99        vld1.32         {d19[0]}, [r0], r1
100        vld1.32         {d21[0]}, [r0], r1
101        vld1.32         {d23[0]}, [r0], r1
102        vtrn.8          q8,  q9
103        vtrn.8          q10, q11
104        vtrn.16         q8,  q10
105        vtrn.16         q9,  q11
106        vp6_edge_filter
107        vtrn.8          q9,  q10
108        vst1.16         {d18[0]}, [r3], r1
109        vst1.16         {d20[0]}, [r3], r1
110        vst1.16         {d18[1]}, [r3], r1
111        vst1.16         {d20[1]}, [r3], r1
112        vst1.16         {d18[2]}, [r3], r1
113        vst1.16         {d20[2]}, [r3], r1
114        vst1.16         {d18[3]}, [r3], r1
115        vst1.16         {d20[3]}, [r3], r1
116        vst1.16         {d19[0]}, [r3], r1
117        vst1.16         {d21[0]}, [r3], r1
118        vst1.16         {d19[1]}, [r3], r1
119        vst1.16         {d21[1]}, [r3], r1
120        bx              lr
121endfunc
122