• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <cassert>
9 #include <cstdio>
10 #include <cstdint>
11 #include "experimental/lowp-basic/QMath.h"
12 
13 // Compile for x86_64 + ssse3 with:
14 //     c++ -O3 --std=c++17 -mssse3 experimental/lowp-basic/lowp_experiments.cpp -o lowp
15 //
16 // Compile for aarch64 with (Mac os):
17 //    c++ -O3 --std=c++17 -arch arm64 experimental/lowp-basic/lowp_experiments.cpp  -o lowp
18 //
19 // View assembly:
20 //    dumpobj -d lowp | less
21 
22 template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));
23 
24 #if !defined(__clang__)
25     static_assert(false, "This only works on clang.");
26 #endif
27 
28 #if defined(__SSSE3__)
29     #include <immintrin.h>
30 #endif
31 
32 #if defined(__ARM_NEON)
33     // From section 5.5.5 of the ARM C Language Extensions (ACLE)
34     #include <arm_neon.h>
35 #endif
36 
37 using Q15 = V<8, uint16_t>;
38 
39 #if defined(__SSSE3__)
test_mm_mulhrs_epi16_simulation()40 static void test_mm_mulhrs_epi16_simulation() {
41     for (int i = -32768; i < 32768; i++) {
42         for (int j = -32768; j < 32768; j++) {
43             Q15 a(i);
44             Q15 b(j);
45             Q15 simResult = simulate_ssse3_mm_mulhrs_epi16(a, b);
46             Q15 intrinsicResult = _mm_mulhrs_epi16(a, b);
47             for (int i = 0; i < 8; i++) {
48                 if (simResult[i] != intrinsicResult[i]) {
49                     printf("simulate_ssse3_mm_mulhrs_epi16 broken\n");
50                     printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
51                            i, a[i], b[i], intrinsicResult[i], simResult[i]);
52                     exit(1);
53                 }
54             }
55         }
56     }
57 }
58 
59 // Use ssse3 to simulate saturating multiply on arm.
ssse3_vqrdmulhq_s16(Q15 a,Q15 b)60 static Q15 ssse3_vqrdmulhq_s16(Q15 a, Q15 b) {
61     constexpr Q15 limit(0x8000);
62     const Q15 product = _mm_mulhrs_epi16(a, b);
63     const Q15 eq = _mm_cmpeq_epi16(product, limit);
64     return _mm_xor_si128(eq, product);
65 }
66 
test_ssse3_vqrdmulhq_s16()67 static void test_ssse3_vqrdmulhq_s16() {
68     for (int i = -32768; i < 32768; i++) {
69         for (int j = -32768; j < 32768; j++) {
70             Q15 a(i);
71             Q15 b(j);
72             Q15 simResult = ssse3_vqrdmulhq_s16(a, b);
73             Q15 realVqrdmulhqS16 = simulate_neon_vqrdmulhq_s16(a, b);
74             for (int i = 0; i < 8; i++) {
75                 if (simResult[i] != realVqrdmulhqS16[i]) {
76                     printf("simulating vqrdmulhq_s16 with ssse3 broken\n");
77                     printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
78                            i, a[i], b[i], realVqrdmulhqS16[i], simResult[i]);
79                     exit(1);
80                 }
81             }
82         }
83     }
84 }
85 
86 #endif
87 
88 #if defined(__ARM_NEON)
test_neon_vqrdmulhq_s16_simulation()89 static void test_neon_vqrdmulhq_s16_simulation() {
90     for (int i = -32768; i < 32768; i++) {
91         for (int j = -32768; j < 32768; j++) {
92             Q15 a(i);
93             Q15 b(j);
94             Q15 simResult = simulate_neon_vqrdmulhq_s16(a, b);
95             Q15 intrinsicResult = vqrdmulhq_s16(a, b);
96             for (int i = 0; i < 8; i++) {
97                 if (simResult[i] != intrinsicResult[i]) {
98                     printf("simulate_neon_vqrdmulhq_s16 broken\n");
99                     printf("i: %d, a: %hx b: %hx, intrinsic: %hx, sim: %hx\n",
100                            i, a[i], b[i], intrinsicResult[i], simResult[i]);
101                     exit(1);
102                 }
103             }
104         }
105     }
106 }
107 #endif
108 
main()109 int main() {
110     #if defined(__SSSE3__)
111         //test_mm_mulhrs_epi16_simulation();
112         test_ssse3_vqrdmulhq_s16();
113     #endif
114     #if defined(__ARM_NEON)
115         test_neon_vqrdmulhq_s16_simulation();
116     #endif
117     printf("Done.\n");
118     return 0;
119 }
120