• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/aarch64/asm.S"
22#include "asm-offsets.h"
23
24.macro resample_one     fmt, es=2
25.ifnc \fmt, dbl
26    .macro  M_MUL2      x:vararg
27    .endm
28    .macro  M_MLA2      x:vararg
29    .endm
30.endif
31function ff_resample_one_\fmt\()_neon, export=1
32        sxtw            x2,  w2
33        ldr             x9,  [x0, #FILTER_BANK]
34        ldr             w6,  [x0, #FILTER_LENGTH]
35        ldp             w7,  w8,  [x0, #PHASE_SHIFT]    // and phase_mask
36        lsr             x10, x4,  x7                    // sample_index
37        and             x4,  x4,  x8
38        lsl             x11, x6,  #\es          // filter_length * elem_size
39        add             x3,  x3,  x10, lsl #\es // src[sample_index]
40        madd            x9,  x11, x4,  x9       // filter
41        cmp             w6,  #16
42        b.lt            5f
438:      // remaining filter_length at least 16
44        subs            w6,  w6,  #16
45        LOAD8           v4,  v5,  v6,  v7,  x3
46        LOAD8           v16, v17, v18, v19, x9
47        M_MUL           v0,  v4,  v16, v1
48        M_MUL2          v1,  v6,  v18
497:
50        LOAD8           v20, v21, v22, v23, x3
51        M_MLA           v0,  v5,  v17, v1
52        M_MLA2          v1,  v7,  v19
53        LOAD8           v24, v25, v26, v27, x9
54        M_MLA           v0,  v20, v24, v1
55        M_MLA2          v1,  v22, v26
56        b.eq            6f
57        cmp             w6,  #16
58        M_MLA           v0,  v21, v25, v1
59        M_MLA2          v1,  v23, v27
60        b.lt            4f
61        subs            w6,  w6,  #16
62        LOAD8           v4,  v5,  v6,  v7,  x3
63        LOAD8           v16, v17, v18, v19, x9
64        M_MLA           v0,  v4,  v16, v1
65        M_MLA2          v1,  v6,  v18
66        b               7b
676:
68        M_MLA           v0,  v21, v25,  v1
69        M_MLA2          v1,  v23, v27
70        STORE_ONE       0,   x1,  x2,   v1
71        ret
725:
73        movi            v0.16b, #0
74        movi            v1.16b, #0
754:      // remaining filter_length 1-15
76        cmp             w6,  #4
77        b.lt            2f
78        subs            w6,  w6,  #4
79        LOAD4           v4,  v5,  x3
80        LOAD4           v6,  v7,  x9
81        M_MLA           v0,  v4,  v6,  v1
82        M_MLA2          v1,  v5,  v7
83        b.eq            0f
84        b               4b
852:      // remaining filter_length 1-3
86        cmp             w6,  #2
87        b.lt            1f
88        LOAD2           2,   x3
89        LOAD2           3,   x9
90        subs            w6,  w6,  #2
91        M_MLA           v0,  v2,  v3
92        b.eq            0f
931:      // remaining filter_length 1
94        LOAD1           6,   x3
95        LOAD1           7,   x9
96        M_MLA           v0,  v6,  v7
970:
98        STORE_ONE       0,   x1,  x2,  v1
99        ret
100endfunc
101
102.purgem LOAD1
103.purgem LOAD2
104.purgem LOAD4
105.purgem LOAD8
106.purgem M_MLA
107.purgem M_MLA2
108.purgem M_MUL
109.purgem M_MUL2
110.purgem STORE_ONE
111.endm
112
113
114.macro  LOAD1           d1, addr
115        ldr             d\d1, [\addr], #8
116.endm
117.macro  LOAD2           d1, addr
118        ld1             {v\d1\().2d}, [\addr], #16
119.endm
120.macro  LOAD4           d1, d2, addr
121        ld1             {\d1\().2d,\d2\().2d}, [\addr], #32
122.endm
123.macro  LOAD8           d1, d2, d3, d4, addr
124        ld1             {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64
125.endm
126.macro  M_MLA           d, r0, r1, d2:vararg
127        fmla            \d\().2d, \r0\().2d, \r1\().2d
128.endm
129.macro  M_MLA2          second:vararg
130        M_MLA           \second
131.endm
132.macro  M_MUL           d, r0, r1, d2:vararg
133        fmul            \d\().2d, \r0\().2d, \r1\().2d
134.endm
135.macro  M_MUL2          second:vararg
136        M_MUL           \second
137.endm
138.macro  STORE_ONE       rn, addr, idx, d2
139        fadd            v\rn\().2d,  v\rn\().2d,  \d2\().2d
140        faddp           d\rn\(),  v\rn\().2d
141        str             d\rn\(),  [\addr, \idx, lsl #3]
142.endm
143
144resample_one dbl, 3
145
146
147.macro  LOAD1           d1, addr
148        ldr             s\d1, [\addr], #4
149.endm
150.macro  LOAD2           d1, addr
151        ld1             {v\d1\().2s}, [\addr], #8
152.endm
153.macro  LOAD4           d1, d2, addr
154        ld1             {\d1\().4s}, [\addr], #16
155.endm
156.macro  LOAD8           d1, d2, d3, d4, addr
157        ld1             {\d1\().4s,\d2\().4s}, [\addr], #32
158.endm
159.macro  M_MLA           d, r0, r1, d2:vararg
160        fmla            \d\().4s, \r0\().4s, \r1\().4s
161.endm
162.macro  M_MUL           d, r0, r1, d2:vararg
163        fmul            \d\().4s, \r0\().4s, \r1\().4s
164.endm
165.macro  STORE_ONE       rn, addr, idx, d2
166        faddp           v\rn\().4s,  v\rn\().4s,  v\rn\().4s
167        faddp           s\rn\(),  v\rn\().2s
168        str             s\rn\(),  [\addr, \idx, lsl #2]
169.endm
170
171resample_one flt
172
173
174.macro  LOAD1           d1, addr
175        ldr             h\d1, [\addr], #2
176.endm
177.macro  LOAD2           d1, addr
178        ldr             s\d1, [\addr], #4
179.endm
180.macro  LOAD4           d1, d2, addr
181        ld1             {\d1\().4h}, [\addr], #8
182.endm
183.macro  LOAD8           d1, d2, d3, d4, addr
184        ld1             {\d1\().4h,\d2\().4h}, [\addr], #16
185.endm
186.macro  M_MLA           d, r0, r1, d2:vararg
187        smlal           \d\().4s, \r0\().4h, \r1\().4h
188.endm
189.macro  M_MUL           d, r0, r1, d2:vararg
190        smull           \d\().4s, \r0\().4h, \r1\().4h
191.endm
192.macro  STORE_ONE       rn, addr, idx, d2
193        addp            v\rn\().4s,  v\rn\().4s,  v\rn\().4s
194        addp            v\rn\().4s,  v\rn\().4s,  v\rn\().4s
195        sqrshrn         v\rn\().4h,  v\rn\().4s,  #15
196        str             h\rn\(),  [\addr, \idx, lsl #1]
197.endm
198
199resample_one s16, 1
200
201
202.macro  LOAD1           d1, addr
203        ldr             s\d1, [\addr], #4
204.endm
205.macro  LOAD2           d1, addr
206        ld1             {v\d1\().2s}, [\addr], #8
207.endm
208.macro  LOAD4           d1, d2, addr
209        ld1             {\d1\().4s}, [\addr], #16
210.endm
211.macro  LOAD8           d1, d2, d3, d4, addr
212        ld1             {\d1\().4s,\d2\().4s}, [\addr], #32
213.endm
214.macro  M_MLA           d1, r0, r1, d2:vararg
215        smlal           \d1\().2d, \r0\().2s, \r1\().2s
216.ifnb \d2
217        smlal2          \d2\().2d, \r0\().4s, \r1\().4s
218.endif
219.endm
220.macro  M_MUL           d1, r0, r1, d2:vararg
221        smull           \d1\().2d, \r0\().2s, \r1\().2s
222.ifnb \d2
223        smull2          \d2\().2d, \r0\().4s, \r1\().4s
224.endif
225.endm
226.macro  STORE_ONE       rn, addr, idx, d2
227        add             v\rn\().2d,  v\rn\().2d,  \d2\().2d
228        addp            d\rn\(),     v\rn\().2d
229        sqrshrn         v\rn\().2s,  v\rn\().2d,  #30
230        str             s\rn\(),  [\addr, \idx, lsl #2]
231.endm
232
233resample_one s32
234