1 /* 2 * Copyright (c) 2018-2020, 2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_WRAPPER_SUB_H 25 #define ARM_COMPUTE_WRAPPER_SUB_H 26 27 #include <arm_neon.h> 28 29 namespace arm_compute 30 { 31 namespace wrapper 32 { 33 #define VSUB_IMPL(stype, vtype, prefix, postfix) \ 34 inline vtype vsub(const vtype &a, const vtype &b) \ 35 { \ 36 return prefix##_##postfix(a, b); \ 37 } 38 39 VSUB_IMPL(uint8x8_t, uint8x8_t, vsub, u8) 40 VSUB_IMPL(int8x8_t, int8x8_t, vsub, s8) 41 VSUB_IMPL(uint16x4_t, uint16x4_t, vsub, u16) 42 VSUB_IMPL(int16x4_t, int16x4_t, vsub, s16) 43 VSUB_IMPL(uint32x2_t, uint32x2_t, vsub, u32) 44 VSUB_IMPL(int32x2_t, int32x2_t, vsub, s32) 45 VSUB_IMPL(uint64x1_t, uint64x1_t, vsub, u64) 46 VSUB_IMPL(int64x1_t, int64x1_t, vsub, s64) 47 VSUB_IMPL(float32x2_t, float32x2_t, vsub, f32) 48 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 49 VSUB_IMPL(float16x4_t, float16x4_t, vsub, f16) 50 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 51 52 VSUB_IMPL(uint8x16_t, uint8x16_t, vsubq, u8) 53 VSUB_IMPL(int8x16_t, int8x16_t, vsubq, s8) 54 VSUB_IMPL(uint16x8_t, uint16x8_t, vsubq, u16) 55 VSUB_IMPL(int16x8_t, int16x8_t, vsubq, s16) 56 VSUB_IMPL(uint32x4_t, uint32x4_t, vsubq, u32) 57 VSUB_IMPL(int32x4_t, int32x4_t, vsubq, s32) 58 VSUB_IMPL(uint64x2_t, uint64x2_t, vsubq, u64) 59 VSUB_IMPL(int64x2_t, int64x2_t, vsubq, s64) 60 VSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32) 61 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 62 VSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16) 63 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 64 65 #undef VSUB_IMPL 66 67 // VQSUB: Vector saturating sub (No notion of saturation for floating point) 68 #define VQSUB_IMPL(stype, vtype, prefix, postfix) \ 69 inline vtype vqsub(const vtype &a, const vtype &b) \ 70 { \ 71 return prefix##_##postfix(a, b); \ 72 } 73 74 VQSUB_IMPL(uint8x8_t, uint8x8_t, vqsub, u8) 75 VQSUB_IMPL(int8x8_t, int8x8_t, vqsub, s8) 76 VQSUB_IMPL(uint16x4_t, uint16x4_t, vqsub, u16) 77 VQSUB_IMPL(int16x4_t, int16x4_t, vqsub, s16) 78 VQSUB_IMPL(uint32x2_t, uint32x2_t, vqsub, u32) 79 VQSUB_IMPL(int32x2_t, int32x2_t, vqsub, s32) 80 VQSUB_IMPL(uint64x1_t, uint64x1_t, vqsub, u64) 81 VQSUB_IMPL(int64x1_t, int64x1_t, vqsub, s64) 82 VQSUB_IMPL(float32x2_t, float32x2_t, vsub, f32) 83 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 84 VQSUB_IMPL(float16x4_t, float16x4_t, vsub, f16) 85 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 86 87 VQSUB_IMPL(uint8x16_t, uint8x16_t, vqsubq, u8) 88 VQSUB_IMPL(int8x16_t, int8x16_t, vqsubq, s8) 89 VQSUB_IMPL(uint16x8_t, uint16x8_t, vqsubq, u16) 90 VQSUB_IMPL(int16x8_t, int16x8_t, vqsubq, s16) 91 VQSUB_IMPL(uint32x4_t, uint32x4_t, vqsubq, u32) 92 VQSUB_IMPL(int32x4_t, int32x4_t, vqsubq, s32) 93 VQSUB_IMPL(uint64x2_t, uint64x2_t, vqsubq, u64) 94 VQSUB_IMPL(int64x2_t, int64x2_t, vqsubq, s64) 95 VQSUB_IMPL(float32x4_t, float32x4_t, vsubq, f32) 96 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 97 VQSUB_IMPL(float16x8_t, float16x8_t, vsubq, f16) 98 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 99 #undef VQSUB_IMPL 100 101 #define VSUBL_IMPL(rtype, vtype, prefix, postfix) \ 102 inline rtype vsubl(const vtype &a, const vtype &b) \ 103 { \ 104 return prefix##_##postfix(a, b); \ 105 } 106 107 VSUBL_IMPL(int16x8_t, int8x8_t, vsubl, s8) 108 VSUBL_IMPL(int32x4_t, int16x4_t, vsubl, s16) 109 VSUBL_IMPL(int64x2_t, int32x2_t, vsubl, s32) 110 VSUBL_IMPL(uint16x8_t, uint8x8_t, vsubl, u8) 111 VSUBL_IMPL(uint32x4_t, uint16x4_t, vsubl, u16) 112 VSUBL_IMPL(uint64x2_t, uint32x2_t, vsubl, u32) 113 114 #undef VSUB_IMPL 115 116 } // namespace wrapper 117 } // namespace arm_compute 118 #endif /* ARM_COMPUTE_WRAPPER_SUB_H */ 119