• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2023, Alliance for Open Media. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
12 #define AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
13 
14 #include <arm_neon_sve_bridge.h>
15 
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_config.h"
18 
19 // We can access instructions exclusive to the SVE instruction set from a
20 // predominantly Neon context by making use of the Neon-SVE bridge intrinsics
21 // to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
22 // vector (if it's longer than 128 bits) being "don't care".
23 
24 // While sub-optimal on machines that have SVE vector length > 128-bit - as the
25 // remainder of the vector is unused - this approach is still beneficial when
26 // compared to a Neon-only solution.
27 
aom_udotq_u16(uint64x2_t acc,uint16x8_t x,uint16x8_t y)28 static INLINE uint64x2_t aom_udotq_u16(uint64x2_t acc, uint16x8_t x,
29                                        uint16x8_t y) {
30   return svget_neonq_u64(svdot_u64(svset_neonq_u64(svundef_u64(), acc),
31                                    svset_neonq_u16(svundef_u16(), x),
32                                    svset_neonq_u16(svundef_u16(), y)));
33 }
34 
aom_sdotq_s16(int64x2_t acc,int16x8_t x,int16x8_t y)35 static INLINE int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
36   return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
37                                    svset_neonq_s16(svundef_s16(), x),
38                                    svset_neonq_s16(svundef_s16(), y)));
39 }
40 
41 #define aom_svdot_lane_s16(sum, s0, f, lane)                          \
42   svget_neonq_s64(svdot_lane_s64(svset_neonq_s64(svundef_s64(), sum), \
43                                  svset_neonq_s16(svundef_s16(), s0),  \
44                                  svset_neonq_s16(svundef_s16(), f), lane))
45 
aom_tbl_u16(uint16x8_t s,uint16x8_t tbl)46 static INLINE uint16x8_t aom_tbl_u16(uint16x8_t s, uint16x8_t tbl) {
47   return svget_neonq_u16(svtbl_u16(svset_neonq_u16(svundef_u16(), s),
48                                    svset_neonq_u16(svundef_u16(), tbl)));
49 }
50 
aom_tbl_s16(int16x8_t s,uint16x8_t tbl)51 static INLINE int16x8_t aom_tbl_s16(int16x8_t s, uint16x8_t tbl) {
52   return svget_neonq_s16(svtbl_s16(svset_neonq_s16(svundef_s16(), s),
53                                    svset_neonq_u16(svundef_u16(), tbl)));
54 }
55 
56 #endif  // AOM_AOM_DSP_ARM_AOM_NEON_SVE_BRIDGE_H_
57