1 /* 2 * Copyright (c) 2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #ifndef ARM_COMPUTE_WRAPPER_SHR_H 26 #define ARM_COMPUTE_WRAPPER_SHR_H 27 28 #include <arm_neon.h> 29 #include <type_traits> 30 31 namespace arm_compute 32 { 33 namespace wrapper 34 { 35 #define VQRSHRN_IMPL(half_vtype, vtype, prefix, postfix) \ 36 template <int b> \ 37 inline half_vtype vqrshrn(const vtype &a) \ 38 { \ 39 return prefix##_##postfix(a, b); \ 40 } 41 VQRSHRN_IMPL(int8x8_t, int16x8_t, vqrshrn_n, s16) 42 VQRSHRN_IMPL(uint8x8_t, uint16x8_t, vqrshrn_n, u16) 43 VQRSHRN_IMPL(int16x4_t, int32x4_t, vqrshrn_n, s32) 44 VQRSHRN_IMPL(uint16x4_t, uint32x4_t, vqrshrn_n, u32) 45 VQRSHRN_IMPL(int32x2_t, int64x2_t, vqrshrn_n, s64) 46 VQRSHRN_IMPL(uint32x2_t, uint64x2_t, vqrshrn_n, u64) 47 48 #undef VQRSHRN_IMPL 49 50 #ifdef __aarch64__ 51 #define VQRSHRN_SCALAR_IMPL(half_vtype, vtype, prefix, postfix) \ 52 template <int b> \ 53 inline half_vtype vqrshrn(const vtype &a) \ 54 { \ 55 return prefix##_##postfix(a, b); \ 56 } 57 58 VQRSHRN_SCALAR_IMPL(int8_t, int16_t, vqrshrnh_n, s16) 59 VQRSHRN_SCALAR_IMPL(uint8_t, uint16_t, vqrshrnh_n, u16) 60 VQRSHRN_SCALAR_IMPL(int16_t, int32_t, vqrshrns_n, s32) 61 VQRSHRN_SCALAR_IMPL(uint16_t, uint32_t, vqrshrns_n, u32) 62 VQRSHRN_SCALAR_IMPL(int32_t, int64_t, vqrshrnd_n, s64) 63 VQRSHRN_SCALAR_IMPL(uint32_t, uint64_t, vqrshrnd_n, u64) 64 65 #undef VQRSHRN_SCALAR_IMPL 66 #endif // __aarch64__ 67 68 // This function is the mixed version of VQRSHRN and VQRSHRUN. 69 // The input vector is always signed integer, while the returned vector 70 // can be either signed or unsigned depending on the signedness of scalar type T. 71 #define VQRSHRN_EX_IMPL(half_vtype, vtype, prefix_signed, prefix_unsigned, postfix) \ 72 template <int b, typename T> \ 73 inline typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, half_vtype>::type \ 74 vqrshrn_ex(const vtype &a) \ 75 { \ 76 return prefix_signed##_##postfix(a, b); \ 77 } \ 78 \ 79 template <int b, typename T> \ 80 inline typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, u##half_vtype>::type \ 81 vqrshrn_ex(const vtype &a) \ 82 { \ 83 return prefix_unsigned##_##postfix(a, b); \ 84 } 85 VQRSHRN_EX_IMPL(int8x8_t, int16x8_t, vqrshrn_n, vqrshrun_n, s16) 86 VQRSHRN_EX_IMPL(int16x4_t, int32x4_t, vqrshrn_n, vqrshrun_n, s32) 87 VQRSHRN_EX_IMPL(int32x2_t, int64x2_t, vqrshrn_n, vqrshrun_n, s64) 88 #undef VQRSHRN_EX_IMPL 89 90 #define VSHR_IMPL(vtype, prefix, postfix) \ 91 template <int b> \ 92 inline vtype vshr_n(const vtype &a) \ 93 { \ 94 return prefix##_##postfix(a, b); \ 95 } 96 VSHR_IMPL(uint8x8_t, vshr_n, u8) 97 VSHR_IMPL(int8x8_t, vshr_n, s8) 98 #undef VSHR_IMPL 99 100 #define VSHRQ_IMPL(vtype, prefix, postfix) \ 101 template <int b> \ 102 inline vtype vshrq_n(const vtype &a) \ 103 { \ 104 return prefix##_##postfix(a, b); \ 105 } 106 VSHRQ_IMPL(uint32x4_t, vshrq_n, u32) 107 VSHRQ_IMPL(int32x4_t, vshrq_n, s32) 108 #undef VSHRQ_IMPL 109 110 #ifdef __aarch64__ 111 #define VSHRQ_SCALAR_IMPL(vtype, prefix, postfix) \ 112 template <int b> \ 113 inline vtype vshrq_n(const vtype &a) \ 114 { \ 115 return prefix##_##postfix(a, b); \ 116 } 117 VSHRQ_SCALAR_IMPL(uint32_t, vshrd_n, u64) 118 VSHRQ_SCALAR_IMPL(int32_t, vshrd_n, s64) 119 120 #undef VSHRQ_SCALAR_IMPL 121 #endif // __aarch64__ 122 123 #ifdef __aarch64__ 124 #define VQRSHRN_EX_SCALAR_IMPL(half_vtype, vtype, prefix_signed, prefix_unsigned, postfix) \ 125 template <int b, typename T> \ 126 inline typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, half_vtype>::type \ 127 vqrshrn_ex(const vtype &a) \ 128 { \ 129 return prefix_signed##_##postfix(a, b); \ 130 } \ 131 \ 132 template <int b, typename T> \ 133 inline typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, u##half_vtype>::type \ 134 vqrshrn_ex(const vtype &a) \ 135 { \ 136 return prefix_unsigned##_##postfix(a, b); \ 137 } 138 139 VQRSHRN_EX_SCALAR_IMPL(int8_t, int16_t, vqrshrnh_n, vqrshrunh_n, s16) 140 VQRSHRN_EX_SCALAR_IMPL(int16_t, int32_t, vqrshrns_n, vqrshruns_n, s32) 141 VQRSHRN_EX_SCALAR_IMPL(int32_t, int64_t, vqrshrnd_n, vqrshrund_n, s64) 142 143 #undef VQRSHRN_EX_IMPL 144 #endif // __aarch64__ 145 146 } // namespace wrapper 147 } // namespace arm_compute 148 #endif /* ARM_COMPUTE_WRAPPER_SHR_H */ 149