• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/aarch64/asm.S"
22
23function ff_vorbis_inverse_coupling_neon, export=1
24        movi            v20.4s,  #1<<7, lsl #24
25        subs            x2,  x2,  #4
26        mov             x3,  x0
27        mov             x4,  x1
28        b.eq            3f
29
30        ld1             {v7.4s},  [x1], #16
31        ld1             {v6.4s},  [x0], #16
32        cmle            v4.4s,  v7.4s,  #0
33        and             v5.16b, v6.16b, v20.16b
34        eor             v7.16b, v7.16b, v5.16b
35        and             v2.16b, v7.16b, v4.16b
36        bic             v3.16b, v7.16b, v4.16b
37        fadd            v7.4s,  v6.4s,  v2.4s
38        fsub            v6.4s,  v6.4s,  v3.4s
391:      ld1             {v1.4s},  [x1], #16
40        ld1             {v0.4s},  [x0], #16
41        cmle            v4.4s,  v1.4s,  #0
42        and             v5.16b, v0.16b, v20.16b
43        eor             v1.16b, v1.16b, v5.16b
44        st1             {v7.4s},  [x3], #16
45        st1             {v6.4s},  [x4], #16
46        and             v2.16b, v1.16b, v4.16b
47        bic             v3.16b, v1.16b, v4.16b
48        fadd            v1.4s,  v0.4s,  v2.4s
49        fsub            v0.4s,  v0.4s,  v3.4s
50        subs            x2,  x2,  #8
51        b.le            2f
52        ld1             {v7.4s},  [x1], #16
53        ld1             {v6.4s},  [x0], #16
54        cmle            v4.4s,  v7.4s,  #0
55        and             v5.16b, v6.16b, v20.16b
56        eor             v7.16b, v7.16b, v5.16b
57        st1             {v1.4s},  [x3], #16
58        st1             {v0.4s},  [x4], #16
59        and             v2.16b, v7.16b, v4.16b
60        bic             v3.16b, v7.16b, v4.16b
61        fadd            v7.4s,  v6.4s,  v2.4s
62        fsub            v6.4s,  v6.4s,  v3.4s
63        b               1b
64
652:      st1             {v1.4s},  [x3], #16
66        st1             {v0.4s},  [x4], #16
67        b.lt            ret
68
693:      ld1             {v1.4s},  [x1]
70        ld1             {v0.4s},  [x0]
71        cmle            v4.4s,  v1.4s,  #0
72        and             v5.16b, v0.16b, v20.16b
73        eor             v1.16b, v1.16b, v5.16b
74        and             v2.16b, v1.16b, v4.16b
75        bic             v3.16b, v1.16b, v4.16b
76        fadd            v1.4s,  v0.4s,  v2.4s
77        fsub            v0.4s,  v0.4s,  v3.4s
78        st1             {v1.4s},  [x0], #16
79        st1             {v0.4s},  [x1], #16
80ret:
81        ret
82endfunc
83