• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * ARM NEON optimised Format Conversion Utils
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 * Copyright (c) 2015 Janne Grunau  <janne-libav@jannau.net>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23#include "config.h"
24#include "libavutil/aarch64/asm.S"
25
26function ff_int32_to_float_fmul_scalar_neon, export=1
27        ld1             {v1.4s,v2.4s}, [x1], #32
28        scvtf           v1.4s,  v1.4s
29        scvtf           v2.4s,  v2.4s
301:
31        subs            w2,  w2,  #8
32        fmul            v3.4s,  v1.4s,  v0.s[0]
33        fmul            v4.4s,  v2.4s,  v0.s[0]
34        b.le            2f
35        ld1             {v1.4s,v2.4s}, [x1], #32
36        st1             {v3.4s,v4.4s}, [x0], #32
37        scvtf           v1.4s,  v1.4s
38        scvtf           v2.4s,  v2.4s
39        b               1b
402:
41        st1             {v3.4s,v4.4s}, [x0]
42        ret
43endfunc
44
45function ff_int32_to_float_fmul_array8_neon, export=1
46        lsr             w4,  w4,  #3
47        subs            w5,  w4,  #1
48        b.eq            1f
492:
50        ld1             {v0.4s,v1.4s}, [x2], #32
51        ld1             {v2.4s,v3.4s}, [x2], #32
52        scvtf           v0.4s,  v0.4s
53        scvtf           v1.4s,  v1.4s
54        ld1             {v16.2s},  [x3], #8
55        scvtf           v2.4s,  v2.4s
56        scvtf           v3.4s,  v3.4s
57        fmul            v4.4s,  v0.4s,  v16.s[0]
58        fmul            v5.4s,  v1.4s,  v16.s[0]
59        fmul            v6.4s,  v2.4s,  v16.s[1]
60        fmul            v7.4s,  v3.4s,  v16.s[1]
61        st1             {v4.4s,v5.4s}, [x1], #32
62        st1             {v6.4s,v7.4s}, [x1], #32
63        subs            w5,  w5,  #2
64        b.gt            2b
65        b.eq            1f
66        ret
671:
68        ld1             {v0.4s,v1.4s}, [x2]
69        ld1             {v16.s}[0],  [x3]
70        scvtf           v0.4s,  v0.4s
71        scvtf           v1.4s,  v1.4s
72        fmul            v4.4s,  v0.4s,  v16.s[0]
73        fmul            v5.4s,  v1.4s,  v16.s[0]
74        st1             {v4.4s,v5.4s}, [x1]
75        ret
76endfunc
77