1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
18 #define BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
19
20 #include <cstdint>
21 #include <cstring>
22 #include <tuple>
23
24 extern "C" uint64_t get_fp64_literal();
25
26 template <class Dest, class Source>
bit_cast(const Source & source)27 inline Dest bit_cast(const Source& source) {
28 static_assert(sizeof(Dest) == sizeof(Source));
29 Dest dest;
30 memcpy(&dest, &source, sizeof(dest));
31 return dest;
32 }
33
MakeF32x4(float f1,float f2,float f3,float f4)34 inline __uint128_t MakeF32x4(float f1, float f2, float f3, float f4) {
35 float array[] = {f1, f2, f3, f4};
36 return bit_cast<__uint128_t>(array);
37 }
38
MakeF64x2(double d1,double d2)39 inline __uint128_t MakeF64x2(double d1, double d2) {
40 double array[] = {d1, d2};
41 return bit_cast<__uint128_t>(array);
42 }
43
MakeUInt128(uint64_t low,uint64_t high)44 constexpr __uint128_t MakeUInt128(uint64_t low, uint64_t high) {
45 return (static_cast<__uint128_t>(high) << 64) | static_cast<__uint128_t>(low);
46 }
47
MakeU32x4(uint32_t u0,uint32_t u1,uint32_t u2,uint32_t u3)48 constexpr __uint128_t MakeU32x4(uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) {
49 return (static_cast<__uint128_t>(u3) << 96) | (static_cast<__uint128_t>(u2) << 64) |
50 (static_cast<__uint128_t>(u1) << 32) | static_cast<__uint128_t>(u0);
51 }
52
53 // Floating-point literals
54 constexpr uint32_t kOneF32AsInteger = 0x3f800000U;
55 constexpr uint64_t kOneF64AsInteger = 0x3ff0000000000000ULL;
56 constexpr uint32_t kDefaultNaN32AsInteger = 0x7fc00000U;
57 constexpr uint64_t kDefaultNaN64AsInteger = 0x7ff8000000000000ULL;
58 constexpr uint32_t kQuietNaN32AsInteger = kDefaultNaN32AsInteger;
59 constexpr uint64_t kQuietNaN64AsInteger = kDefaultNaN64AsInteger;
60 constexpr uint32_t kNegativeQuietNaN32AsInteger = kDefaultNaN32AsInteger ^ 0x80000000U;
61 constexpr uint64_t kNegativeQuietNaN64 = kDefaultNaN64AsInteger ^ 8000000000000000ULL;
62 // There are multiple quiet and signaling NaNs. These are the ones that have the LSB "on".
63 constexpr uint32_t kSignalingNaN32AsInteger_1 = 0x7f800001U;
64 constexpr uint64_t kSignalingNaN64AsInteger_1 = 0x7ff0000000000001ULL;
65 constexpr uint32_t kQuietNaN32AsInteger_1 = kQuietNaN32AsInteger | 1;
66 constexpr uint64_t kQuietNaN64AsInteger_1 = kQuietNaN64AsInteger | 1;
67
68 constexpr uint32_t kFpcrFzBit = 1U << 24;
69 constexpr uint32_t kFpcrDnBit = 1U << 25;
70 constexpr uint32_t kFpcrRModeTieEven = 0b00U << 22;
71 constexpr uint32_t kFpcrRModePosInf = 0b01U << 22;
72 constexpr uint32_t kFpcrRModeNegInf = 0b10U << 22;
73 constexpr uint32_t kFpcrRModeZero = 0b11U << 22;
74 constexpr uint32_t kFpcrIdeBit = 1 << 15;
75 constexpr uint32_t kFpcrIxeBit = 1 << 12;
76 constexpr uint32_t kFpcrUfeBit = 1 << 11;
77 constexpr uint32_t kFpcrOfeBit = 1 << 10;
78 constexpr uint32_t kFpcrDzeBit = 1 << 9;
79 constexpr uint32_t kFpcrIoeBit = 1 << 8;
80
81 constexpr uint32_t kFpsrQcBit = 1U << 27;
82 constexpr uint32_t kFpsrIdcBit = 1 << 7; // Input Denormal cumulative exception flag.
83 constexpr uint32_t kFpsrIxcBit = 1 << 4; // Inexact cumulative exception flag.
84 constexpr uint32_t kFpsrUfcBit = 1 << 3; // Underflow cumulative exception flag.
85 constexpr uint32_t kFpsrOfcBit = 1 << 2; // Overflow cumulative exception flag.
86 constexpr uint32_t kFpsrDzcBit = 1 << 1; // Division by Zero cumulative exception flag.
87 constexpr uint32_t kFpsrIocBit = 1 << 0; // Invalid Operation cumulative exception flag.
88
89 #define ASM_INSN_WRAP_FUNC_W_RES(ASM) \
90 []() -> __uint128_t { \
91 __uint128_t res; \
92 asm(ASM : "=w"(res)); \
93 return res; \
94 }
95
96 #define ASM_INSN_WRAP_FUNC_R_RES_W_ARG(ASM) \
97 [](__uint128_t arg) -> uint64_t { \
98 uint64_t res; \
99 asm(ASM : "=r"(res) : "w"(arg)); \
100 return res; \
101 }
102
103 #define ASM_INSN_WRAP_FUNC_W_RES_R_ARG(ASM) \
104 [](uint64_t arg) -> __uint128_t { \
105 __uint128_t res; \
106 asm(ASM : "=w"(res) : "r"(arg)); \
107 return res; \
108 }
109
110 #define ASM_INSN_WRAP_FUNC_W_RES_W_ARG(ASM) \
111 [](__uint128_t arg) -> __uint128_t { \
112 __uint128_t res; \
113 asm(ASM : "=w"(res) : "w"(arg)); \
114 return res; \
115 }
116
117 #define ASM_INSN_WRAP_FUNC_W_RES_WW_ARG(ASM) \
118 [](__uint128_t arg1, __uint128_t arg2) -> __uint128_t { \
119 __uint128_t res; \
120 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2)); \
121 return res; \
122 }
123
124 #define ASM_INSN_WRAP_FUNC_W_RES_W0_ARG(ASM) \
125 [](__uint128_t arg1, __uint128_t arg2) -> __uint128_t { \
126 __uint128_t res; \
127 asm(ASM : "=w"(res) : "w"(arg1), "0"(arg2)); \
128 return res; \
129 }
130
131 #define ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG(ASM) \
132 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> __uint128_t { \
133 __uint128_t res; \
134 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2), "w"(arg3)); \
135 return res; \
136 }
137
138 #define ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG(ASM) \
139 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> __uint128_t { \
140 __uint128_t res; \
141 asm(ASM : "=w"(res) : "w"(arg1), "w"(arg2), "0"(arg3)); \
142 return res; \
143 }
144
145 // clang-format off
146 // We turn off clang-format here because it would place ASM like so:
147 //
148 // asm("msr fpsr, xzr\n\t" ASM
149 // "\n\t"
150 // "mrs %1, fpsr"
151 // : "=w"(res), "=r"(fpsr)
152 // : "w"(arg));
153 #define ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG(ASM) \
154 [](__uint128_t arg) -> std::tuple<__uint128_t, uint32_t> { \
155 __uint128_t res; \
156 uint64_t fpsr; \
157 asm("msr fpsr, xzr\n\t" \
158 ASM "\n\t" \
159 "mrs %1, fpsr" \
160 : "=w"(res), "=r"(fpsr) \
161 : "w"(arg)); \
162 return {res, fpsr}; \
163 }
164
165 #define ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG(ASM) \
166 [](__uint128_t arg1, __uint128_t arg2) -> std::tuple<__uint128_t, uint32_t> { \
167 __uint128_t res; \
168 uint64_t fpsr; \
169 asm("msr fpsr, xzr\n\t" \
170 ASM "\n\t" \
171 "mrs %1, fpsr" \
172 : "=w"(res), "=r"(fpsr) \
173 : "w"(arg1), "0"(arg2)); \
174 return {res, fpsr}; \
175 }
176
177 #define ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG(ASM) \
178 [](__uint128_t arg1, __uint128_t arg2) -> std::tuple<__uint128_t, uint32_t> { \
179 __uint128_t res; \
180 uint64_t fpsr; \
181 asm("msr fpsr, xzr\n\t" \
182 ASM "\n\t" \
183 "mrs %1, fpsr" \
184 : "=w"(res), "=r"(fpsr) \
185 : "w"(arg1), "w"(arg2)); \
186 return {res, fpsr}; \
187 }
188
189 #define ASM_INSN_WRAP_FUNC_W_RES_WC_ARG(ASM) \
190 [](__uint128_t arg, uint32_t fpcr) -> __uint128_t { \
191 __uint128_t res; \
192 asm("msr fpcr, %x2\n\t" \
193 ASM "\n\t" \
194 "msr fpcr, xzr" \
195 : "=w"(res) \
196 : "w"(arg), "r"(fpcr)); \
197 return res; \
198 }
199
200 #define ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG(ASM) \
201 [](__uint128_t arg1, __uint128_t arg2, __uint128_t arg3) -> std::tuple<__uint128_t, uint32_t> { \
202 __uint128_t res; \
203 uint64_t fpsr; \
204 asm("msr fpsr, xzr\n\t" \
205 ASM "\n\t" \
206 "mrs %1, fpsr" \
207 : "=w"(res), "=r"(fpsr) \
208 : "w"(arg1), "w"(arg2), "0"(arg3)); \
209 return {res, fpsr}; \
210 }
211
212 #define ASM_INSN_WRAP_FUNC_W_RES_WWC_ARG(ASM) \
213 [](__uint128_t arg1, __uint128_t arg2, uint32_t fpcr) -> __uint128_t { \
214 __uint128_t res; \
215 asm("msr fpcr, %x3\n\t" \
216 ASM "\n\t" \
217 "msr fpcr, xzr" \
218 : "=w"(res) \
219 : "w"(arg1), "w"(arg2), "r"(fpcr)); \
220 return res; \
221 }
222
223 // clang-format on
224
225 #endif // BERBERIS_TESTS_INLINE_ASM_TESTS_UTILITY_H_
226