• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23#include "asm-offsets.h"
24
25.macro resample_one     fmt, es=2
26function ff_resample_one_\fmt\()_neon, export=1
27        push            {r4, r5}
28        add             r1, r1, r2, lsl #\es
29
30        ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
31        ldr             ip, [sp, #8] /* index */
32        ldr             r5, [r0, #FILTER_LENGTH]
33        and             r2, ip, r2 /* (index & phase_mask) */
34        ldr             r4, [r0, #PHASE_SHIFT]
35        lsr             r4, ip, r4 /* compute sample_index */
36        mul             r2, r2, r5
37
38        ldr             ip, [r0, #FILTER_BANK]
39        add             r3, r3, r4, lsl #\es /* &src[sample_index] */
40
41        cmp             r5, #8
42        add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
43
44        blt             5f
458:
46        subs            r5, r5, #8
47        LOAD4
48        MUL4
497:
50        LOAD4
51        beq             6f
52        cmp             r5, #8
53        MLA4
54        blt             4f
55        subs            r5, r5, #8
56        LOAD4
57        MLA4
58        b               7b
596:
60        MLA4
61        STORE
62        pop             {r4, r5}
63        bx              lr
645:
65        INIT4
664:      /* remaining filter_length 1 to 7 */
67        cmp             r5, #4
68        blt             2f
69        subs            r5, r5, #4
70        LOAD4
71        MLA4
72        beq             0f
732:      /* remaining filter_length 1 to 3 */
74        cmp             r5, #2
75        blt             1f
76        subs            r5, r5, #2
77        LOAD2
78        MLA2
79        beq             0f
801:      /* remaining filter_length 1 */
81        LOAD1
82        MLA1
830:
84        STORE
85        pop             {r4, r5}
86        bx              lr
87endfunc
88
89.purgem LOAD1
90.purgem LOAD2
91.purgem LOAD4
92.purgem MLA1
93.purgem MLA2
94.purgem MLA4
95.purgem MUL4
96.purgem INIT4
97.purgem STORE
98.endm
99
100
101/* float32 */
102.macro  LOAD1
103        veor.32         d0, d0
104        vld1.32         {d0[0]}, [r0]! /* load filter */
105        vld1.32         {d4[0]}, [r3]! /* load src */
106.endm
107.macro  LOAD2
108        vld1.32         {d0}, [r0]! /* load filter */
109        vld1.32         {d4}, [r3]! /* load src */
110.endm
111.macro  LOAD4
112        vld1.32         {d0,d1}, [r0]! /* load filter */
113        vld1.32         {d4,d5}, [r3]! /* load src */
114.endm
115.macro  MLA1
116        vmla.f32        d16, d0, d4[0]
117.endm
118.macro  MLA2
119        vmla.f32        d16, d0, d4
120.endm
121.macro  MLA4
122        vmla.f32        d16, d0, d4
123        vmla.f32        d17, d1, d5
124.endm
125.macro  MUL4
126        vmul.f32        d16, d0, d4
127        vmul.f32        d17, d1, d5
128.endm
129.macro  INIT4
130        veor.f32        q8, q8
131.endm
132.macro  STORE
133        vpadd.f32       d16, d16, d17
134        vpadd.f32       d16, d16, d16
135        vst1.32         d16[0], [r1]
136.endm
137
138resample_one flt, 2
139
140
141/* s32 */
142.macro  LOAD1
143        veor.32         d0, d0
144        vld1.32         {d0[0]}, [r0]! /* load filter */
145        vld1.32         {d4[0]}, [r3]! /* load src */
146.endm
147.macro  LOAD2
148        vld1.32         {d0}, [r0]! /* load filter */
149        vld1.32         {d4}, [r3]! /* load src */
150.endm
151.macro  LOAD4
152        vld1.32         {d0,d1}, [r0]! /* load filter */
153        vld1.32         {d4,d5}, [r3]! /* load src */
154.endm
155.macro  MLA1
156        vmlal.s32       q8, d0, d4[0]
157.endm
158.macro  MLA2
159        vmlal.s32       q8, d0, d4
160.endm
161.macro  MLA4
162        vmlal.s32       q8, d0, d4
163        vmlal.s32       q9, d1, d5
164.endm
165.macro  MUL4
166        vmull.s32       q8, d0, d4
167        vmull.s32       q9, d1, d5
168.endm
169.macro  INIT4
170        veor.s64        q8, q8
171        veor.s64        q9, q9
172.endm
173.macro  STORE
174        vadd.s64        q8, q8, q9
175        vadd.s64        d16, d16, d17
176        vqrshrn.s64     d16, q8, #30
177        vst1.32         d16[0], [r1]
178.endm
179
180resample_one s32, 2
181
182
183/* s16 */
184.macro  LOAD1
185        veor.16         d0, d0
186        vld1.16         {d0[0]}, [r0]! /* load filter */
187        vld1.16         {d4[0]}, [r3]! /* load src */
188.endm
189.macro  LOAD2
190        veor.16         d0, d0
191        vld1.32         {d0[0]}, [r0]! /* load filter */
192        veor.16         d4, d4
193        vld1.32         {d4[0]}, [r3]! /* load src */
194.endm
195.macro  LOAD4
196        vld1.16         {d0}, [r0]! /* load filter */
197        vld1.16         {d4}, [r3]! /* load src */
198.endm
199.macro  MLA1
200        vmlal.s16       q8, d0, d4[0]
201.endm
202.macro  MLA2
203        vmlal.s16       q8, d0, d4
204.endm
205.macro  MLA4
206        vmlal.s16       q8, d0, d4
207.endm
208.macro  MUL4
209        vmull.s16       q8, d0, d4
210.endm
211.macro  INIT4
212        veor.s32        q8, q8
213.endm
214.macro  STORE
215        vpadd.s32       d16, d16, d17
216        vpadd.s32       d16, d16, d16
217        vqrshrn.s32     d16, q8, #15
218        vst1.16         d16[0], [r1]
219.endm
220
221resample_one s16, 1
222
223
224.macro resample_linear  fmt, es=2
225function ff_resample_linear_\fmt\()_neon, export=1
226        push            {r4, r5}
227        add             r1, r1, r2, lsl #\es
228
229        ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
230        ldr             ip, [sp, #8] /* index */
231        ldr             r5, [r0, #FILTER_LENGTH]
232        and             r2, ip, r2 /* (index & phase_mask) */
233        ldr             r4, [r0, #PHASE_SHIFT]
234        lsr             r4, ip, r4 /* compute sample_index */
235        mul             r2, r2, r5
236
237        ldr             ip, [r0, #FILTER_BANK]
238        add             r3, r3, r4, lsl #\es /* &src[sample_index] */
239
240        cmp             r5, #8
241        ldr             r4, [r0, #SRC_INCR]
242        add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
243        add             r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */
244
245        blt             5f
2468:
247        subs            r5, r5, #8
248        LOAD4
249        MUL4
2507:
251        LOAD4
252        beq             6f
253        cmp             r5, #8
254        MLA4
255        blt             4f
256        subs            r5, r5, #8
257        LOAD4
258        MLA4
259        b               7b
2606:
261        MLA4
262        STORE
263        pop             {r4, r5}
264        bx              lr
2655:
266        INIT4
2674:      /* remaining filter_length 1 to 7 */
268        cmp             r5, #4
269        blt             2f
270        subs            r5, r5, #4
271        LOAD4
272        MLA4
273        beq             0f
2742:      /* remaining filter_length 1 to 3 */
275        cmp             r5, #2
276        blt             1f
277        subs            r5, r5, #2
278        LOAD2
279        MLA2
280        beq             0f
2811:      /* remaining filter_length 1 */
282        LOAD1
283        MLA1
2840:
285        STORE
286        pop             {r4, r5}
287        bx              lr
288endfunc
289
290.purgem LOAD1
291.purgem LOAD2
292.purgem LOAD4
293.purgem MLA1
294.purgem MLA2
295.purgem MLA4
296.purgem MUL4
297.purgem INIT4
298.purgem STORE
299.endm
300
301
302/* float32 linear */
303.macro  LOAD1
304        veor.32         d0, d0
305        veor.32         d2, d2
306        vld1.32         {d0[0]}, [r0]! /* load filter */
307        vld1.32         {d2[0]}, [r2]! /* load filter */
308        vld1.32         {d4[0]}, [r3]! /* load src */
309.endm
310.macro  LOAD2
311        vld1.32         {d0}, [r0]! /* load filter */
312        vld1.32         {d2}, [r2]! /* load filter */
313        vld1.32         {d4}, [r3]! /* load src */
314.endm
315.macro  LOAD4
316        vld1.32         {d0,d1}, [r0]! /* load filter */
317        vld1.32         {d2,d3}, [r2]! /* load filter */
318        vld1.32         {d4,d5}, [r3]! /* load src */
319.endm
320.macro  MLA1
321        vmla.f32        d18, d0, d4[0]
322        vmla.f32        d16, d2, d4[0]
323.endm
324.macro  MLA2
325        vmla.f32        d18, d0, d4
326        vmla.f32        d16, d2, d4
327.endm
328.macro  MLA4
329        vmla.f32        q9, q0, q2
330        vmla.f32        q8, q1, q2
331.endm
332.macro  MUL4
333        vmul.f32        q9, q0, q2
334        vmul.f32        q8, q1, q2
335.endm
336.macro  INIT4
337        veor.f32        q9, q9
338        veor.f32        q8, q8
339.endm
340.macro  STORE
341        vldr            s0, [sp, #12] /* frac */
342        vmov            s1, r4
343        vcvt.f32.s32    d0, d0
344
345        vsub.f32        q8, q8, q9 /* v2 - val */
346        vpadd.f32       d18, d18, d19
347        vpadd.f32       d16, d16, d17
348        vpadd.f32       d2, d18, d18
349        vpadd.f32       d1, d16, d16
350
351        vmul.f32        s2, s2, s0 /* (v2 - val) * frac */
352        vdiv.f32        s2, s2, s1 /* / c->src_incr */
353        vadd.f32        s4, s4, s2
354
355        vstr            s4, [r1]
356.endm
357
358resample_linear flt, 2
359