• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef QMath_DEFINED
9 #define QMath_DEFINED
10 
11 template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));
12 
13 #if !defined(__clang__)
14 static_assert(false, "This only works on clang.");
15 #endif
16 
17 #if defined(__SSSE3__)
18 #include <immintrin.h>
19 #endif
20 
21 #if defined(__ARM_NEON)
22 // From section 5.5.5 of the ARM C Language Extensions (ACLE)
23     #include <arm_neon.h>
24 #endif
25 
26 #include <cassert>
27 #include <cstdint>
28 
29 using Q15 = V<8, uint16_t>;
30 using I16 = V<8, int16_t>;
31 using U16 = V<8, uint16_t>;
32 
33 
constrained_add(I16 a,U16 b)34 static inline U16 constrained_add(I16 a, U16 b) {
35 for (size_t i = 0; i < 8; i++) {
36     // Ensure that a + b is on the interval [0, UINT16_MAX]
37     assert(-b[i] <= a[i] && a[i] <= UINT16_MAX - b[i]);
38 }
39     U16 answer = b + a;
40     return answer;
41 }
42 
43 // A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
simulate_ssse3_mm_mulhrs_epi16(I16 a,I16 b)44 static inline I16 simulate_ssse3_mm_mulhrs_epi16(I16 a, I16 b) {
45     I16 result;
46     auto m = [](int16_t r, int16_t s) {
47         const int32_t rounding = 1 << 14;
48         int32_t temp = (int32_t)r * (int32_t)s + rounding;
49         return (int16_t)(temp >> 15);
50     };
51     for (int i = 0; i < 8; i++) {
52         result[i] = m(a[i], b[i]);
53     }
54     return result;
55 }
56 
57 // A pure C version of the neon intrinsic vqrdmulhq_s16;
simulate_neon_vqrdmulhq_s16(Q15 a,Q15 b)58 static inline Q15 simulate_neon_vqrdmulhq_s16(Q15 a, Q15 b) {
59     Q15 result;
60     const int esize = 16;
61     auto m = [](int16_t r, int16_t s) {
62         const int64_t rounding = 1 << (esize - 1);
63         int64_t product = 2LL * (int64_t)r * (int64_t)s + rounding;
64         int64_t result = product >> esize;
65 
66         // Saturate the result
67         if (int64_t limit =  (1LL << (esize - 1)) - 1; result > limit) { result = limit; }
68         if (int64_t limit = -(1LL << (esize - 1))    ; result < limit) { result = limit; }
69         return result;
70     };
71     for (int i = 0; i < 8; i++) {
72         result[i] = m(a[i], b[i]);
73     }
74     return result;
75 }
76 
77 #endif  // QMath_DEFINED
78