• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "libavutil/aarch64/asm.S"
20
21           // 0.85..^1    0.85..^2    0.85..^3    0.85..^4
22const tab_st, align=4
23        .word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
24endconst
25const tab_x0, align=4
26        .word 0x0,        0x3f599a00, 0x3f38f671, 0x3f1d382a
27endconst
28const tab_x1, align=4
29        .word 0x0,        0x0,        0x3f599a00, 0x3f38f671
30endconst
31const tab_x2, align=4
32        .word 0x0,        0x0,        0x0,        0x3f599a00
33endconst
34
35function ff_opus_deemphasis_neon, export=1
36        movrel  x4, tab_st
37        ld1    {v4.4s}, [x4]
38        movrel  x4, tab_x0
39        ld1    {v5.4s}, [x4]
40        movrel  x4, tab_x1
41        ld1    {v6.4s}, [x4]
42        movrel  x4, tab_x2
43        ld1    {v7.4s}, [x4]
44
45        fmul v0.4s, v4.4s, v0.s[0]
46
471:      ld1  {v1.4s, v2.4s}, [x1], #32
48
49        fmla v0.4s, v5.4s, v1.s[0]
50        fmul v3.4s, v7.4s, v2.s[2]
51
52        fmla v0.4s, v6.4s, v1.s[1]
53        fmla v3.4s, v6.4s, v2.s[1]
54
55        fmla v0.4s, v7.4s, v1.s[2]
56        fmla v3.4s, v5.4s, v2.s[0]
57
58        fadd v1.4s, v1.4s, v0.4s
59        fadd v2.4s, v2.4s, v3.4s
60
61        fmla v2.4s, v4.4s, v1.s[3]
62
63        st1  {v1.4s, v2.4s}, [x0], #32
64        fmul v0.4s, v4.4s, v2.s[3]
65
66        subs w2, w2, #8
67        b.gt 1b
68
69        mov s0, v2.s[3]
70
71        ret
72endfunc
73
74function ff_opus_postfilter_neon, export=1
75        ld1 {v0.4s}, [x2]
76        dup v1.4s, v0.s[1]
77        dup v2.4s, v0.s[2]
78        dup v0.4s, v0.s[0]
79
80        add w1, w1, #2
81        sub x1, x0, x1, lsl #2
82
83        ld1 {v3.4s}, [x1]
84        fmul v3.4s, v3.4s, v2.4s
85
861:      add x1, x1, #4
87        ld1 {v4.4s}, [x1]
88        add x1, x1, #4
89        ld1 {v5.4s}, [x1]
90        add x1, x1, #4
91        ld1 {v6.4s}, [x1]
92        add x1, x1, #4
93        ld1 {v7.4s}, [x1]
94
95        fmla v3.4s, v7.4s, v2.4s
96        fadd v6.4s, v6.4s, v4.4s
97
98        ld1 {v4.4s}, [x0]
99        fmla v4.4s, v5.4s, v0.4s
100
101        fmul v6.4s, v6.4s, v1.4s
102        fadd v6.4s, v6.4s, v3.4s
103
104        fadd v4.4s, v4.4s, v6.4s
105        fmul v3.4s, v7.4s, v2.4s
106
107        st1  {v4.4s}, [x0], #16
108
109        subs w3, w3, #4
110        b.gt 1b
111
112        ret
113endfunc
114