• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_INTRINSICS_INTRINSICS_FLOAT_X86_H_
18 #define BERBERIS_INTRINSICS_INTRINSICS_FLOAT_X86_H_
19 
20 #include <emmintrin.h>
21 #include <immintrin.h>
22 #include <math.h>
23 #include <pmmintrin.h>  // _MM_DENORMALS_ZERO_ON
24 #include <xmmintrin.h>  // _MM_FLUSH_ZERO_ON
25 
26 #include "berberis/base/bit_util.h"
27 #include "berberis/base/logging.h"
28 #include "berberis/intrinsics/guest_fpstate.h"  // FE_HOSTROUND
29 
30 namespace berberis {
31 
32 namespace intrinsics {
33 
34 template <bool precise_nan_operations_handling>
35 class ScopedStandardFPSCRValue;
36 
37 // StandardFPSCRValue does not really depend on type, but it's easier to just always use it
38 // for all types.  Types except for Float32 and Float64 don't do anything;
39 template <>
40 class ScopedStandardFPSCRValue<true> {
41  public:
ScopedStandardFPSCRValue()42   ScopedStandardFPSCRValue() : saved_mxcsr_(_mm_getcsr()) {
43     // Keep exceptions disabled, set FTZ and DAZ bits.
44     _mm_setcsr(_MM_MASK_MASK | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
45   }
~ScopedStandardFPSCRValue()46   ~ScopedStandardFPSCRValue() {
47     // Keep exceptions, pick everything else from saved mask.
48     _mm_setcsr((_mm_getcsr() & _MM_EXCEPT_MASK) | saved_mxcsr_);
49   }
50 
51  private:
52   uint32_t saved_mxcsr_;
53 };
54 
55 template <>
56 class [[maybe_unused]] ScopedStandardFPSCRValue<false> {};
57 
58 #define MAKE_BINARY_OPERATOR(guest_name, operator_name, assignment_name)                \
59                                                                                         \
60   inline Float32 operator operator_name(const Float32& v1, const Float32& v2) {         \
61     Float32 result;                                                                     \
62     asm(#guest_name "ss %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
63     return result;                                                                      \
64   }                                                                                     \
65                                                                                         \
66   inline Float32& operator assignment_name(Float32& v1, const Float32& v2) {            \
67     asm(#guest_name "ss %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
68     return v1;                                                                          \
69   }                                                                                     \
70                                                                                         \
71   inline Float64 operator operator_name(const Float64& v1, const Float64& v2) {         \
72     Float64 result;                                                                     \
73     asm(#guest_name "sd %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
74     return result;                                                                      \
75   }                                                                                     \
76                                                                                         \
77   inline Float64& operator assignment_name(Float64& v1, const Float64& v2) {            \
78     asm(#guest_name "sd %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
79     return v1;                                                                          \
80   }
81 
82 MAKE_BINARY_OPERATOR(add, +, +=)
83 MAKE_BINARY_OPERATOR(sub, -, -=)
84 MAKE_BINARY_OPERATOR(mul, *, *=)
85 MAKE_BINARY_OPERATOR(div, /, /=)
86 
87 #undef MAKE_BINARY_OPERATOR
88 
89 inline bool operator<(const Float32& v1, const Float32& v2) {
90   bool result;
91   asm("ucomiss %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
92   return result;
93 }
94 
95 inline bool operator<(const Float64& v1, const Float64& v2) {
96   bool result;
97   asm("ucomisd %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
98   return result;
99 }
100 
101 inline bool operator>(const Float32& v1, const Float32& v2) {
102   bool result;
103   asm("ucomiss %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
104   return result;
105 }
106 
107 inline bool operator>(const Float64& v1, const Float64& v2) {
108   bool result;
109   asm("ucomisd %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
110   return result;
111 }
112 
113 inline bool operator<=(const Float32& v1, const Float32& v2) {
114   bool result;
115   asm("ucomiss %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
116   return result;
117 }
118 
119 inline bool operator<=(const Float64& v1, const Float64& v2) {
120   bool result;
121   asm("ucomisd %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
122   return result;
123 }
124 
125 inline bool operator>=(const Float32& v1, const Float32& v2) {
126   bool result;
127   asm("ucomiss %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
128   return result;
129 }
130 
131 inline bool operator>=(const Float64& v1, const Float64& v2) {
132   bool result;
133   asm("ucomisd %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
134   return result;
135 }
136 
137 inline bool operator==(const Float32& v1, const Float32& v2) {
138   float result;
139   asm("cmpeqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
140   return bit_cast<uint32_t, float>(result) & 0x1;
141 }
142 
143 inline bool operator==(const Float64& v1, const Float64& v2) {
144   double result;
145   asm("cmpeqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
146   return bit_cast<uint64_t, double>(result) & 0x1;
147 }
148 
149 inline bool operator!=(const Float32& v1, const Float32& v2) {
150   float result;
151   asm("cmpneqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
152   return bit_cast<uint32_t, float>(result) & 0x1;
153 }
154 
155 inline bool operator!=(const Float64& v1, const Float64& v2) {
156   double result;
157   asm("cmpneqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
158   return bit_cast<uint64_t, double>(result) & 0x1;
159 }
160 
161 // It's NOT safe to use ANY functions which return float or double.  That's because IA32 ABI uses
162 // x87 stack to pass arguments (and does that even with -mfpmath=sse) and NaN float and
163 // double values would be corrupted if pushed on it.
164 //
165 // It's safe to use builtins here if that file is compiled with -mfpmath=sse (clang does not have
166 // such flag but uses SSE whenever possible, GCC needs both -msse2 and -mfpmath=sse) since builtins
167 // DON'T use an official calling conventions but are instead embedded in the function - even if all
168 // optimizations are disabled.
169 
CopySignBit(const Float32 & v1,const Float32 & v2)170 inline Float32 CopySignBit(const Float32& v1, const Float32& v2) {
171   return Float32(__builtin_copysignf(v1.value_, v2.value_));
172 }
173 
CopySignBit(const Float64 & v1,const Float64 & v2)174 inline Float64 CopySignBit(const Float64& v1, const Float64& v2) {
175   return Float64(__builtin_copysign(v1.value_, v2.value_));
176 }
177 
Absolute(const Float32 & v)178 inline Float32 Absolute(const Float32& v) {
179   return Float32(__builtin_fabsf(v.value_));
180 }
181 
Absolute(const Float64 & v)182 inline Float64 Absolute(const Float64& v) {
183   return Float64(__builtin_fabs(v.value_));
184 }
185 
Negative(const Float32 & v)186 inline Float32 Negative(const Float32& v) {
187   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
188   Float32 result;
189   uint64_t sign_bit = 0x80000000U;
190   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
191   return result;
192 }
193 
Negative(const Float64 & v)194 inline Float64 Negative(const Float64& v) {
195   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
196   Float64 result;
197   uint64_t sign_bit = 0x8000000000000000ULL;
198   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
199   return result;
200 }
201 
FPClassify(const Float32 & v)202 inline FPInfo FPClassify(const Float32& v) {
203   return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
204                                                   static_cast<int>(FPInfo::kInfinite),
205                                                   static_cast<int>(FPInfo::kNormal),
206                                                   static_cast<int>(FPInfo::kSubnormal),
207                                                   static_cast<int>(FPInfo::kZero),
208                                                   v.value_));
209 }
210 
FPClassify(const Float64 & v)211 inline FPInfo FPClassify(const Float64& v) {
212   return static_cast<FPInfo>(__builtin_fpclassify(static_cast<int>(FPInfo::kNaN),
213                                                   static_cast<int>(FPInfo::kInfinite),
214                                                   static_cast<int>(FPInfo::kNormal),
215                                                   static_cast<int>(FPInfo::kSubnormal),
216                                                   static_cast<int>(FPInfo::kZero),
217                                                   v.value_));
218 }
219 
FPRound(const Float32 & value,uint32_t round_control)220 inline Float32 FPRound(const Float32& value, uint32_t round_control) {
221   Float32 result;
222   switch (round_control) {
223     case FE_HOSTROUND:
224       asm("roundss $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
225       break;
226     case FE_TONEAREST:
227       asm("roundss $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
228       break;
229     case FE_DOWNWARD:
230       asm("roundss $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
231       break;
232     case FE_UPWARD:
233       asm("roundss $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
234       break;
235     case FE_TOWARDZERO:
236       asm("roundss $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
237       break;
238     case FE_TIESAWAY:
239       // TODO(b/146437763): Might fail if value doesn't have a floating part.
240       if (value == FPRound(value, FE_DOWNWARD) + Float32(0.5)) {
241         result = value > Float32(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
242       } else {
243         result = FPRound(value, FE_TONEAREST);
244       }
245       break;
246     default:
247       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
248       result.value_ = 0.f;
249   }
250   return result;
251 }
252 
FPRound(const Float64 & value,uint32_t round_control)253 inline Float64 FPRound(const Float64& value, uint32_t round_control) {
254   Float64 result;
255   switch (round_control) {
256     case FE_HOSTROUND:
257       asm("roundsd $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
258       break;
259     case FE_TONEAREST:
260       asm("roundsd $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
261       break;
262     case FE_DOWNWARD:
263       asm("roundsd $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
264       break;
265     case FE_UPWARD:
266       asm("roundsd $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
267       break;
268     case FE_TOWARDZERO:
269       asm("roundsd $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
270       break;
271     case FE_TIESAWAY:
272       // Since x86 does not support this rounding mode exactly, we must manually handle the
273       // tie-aways (from (-)x.5)
274       if (value == FPRound(value, FE_DOWNWARD)) {
275         // Value is already an integer and can be returned as-is. Checking this first avoids dealing
276         // with numbers too large to be able to have a fractional part.
277         return value;
278       } else if (value == FPRound(value, FE_DOWNWARD) + Float64(0.5)) {
279         // Fraction part is exactly 1/2, in which case we need to tie-away
280         result = value > Float64(0.0) ? FPRound(value, FE_UPWARD) : FPRound(value, FE_DOWNWARD);
281       } else {
282         // Any other case can be handled by to-nearest rounding.
283         result = FPRound(value, FE_TONEAREST);
284       }
285       break;
286     default:
287       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
288       result.value_ = 0.;
289   }
290   return result;
291 }
292 
IsNan(const Float32 & v)293 inline int IsNan(const Float32& v) {
294   return __builtin_isnan(v.value_);
295 }
296 
IsNan(const Float64 & v)297 inline int IsNan(const Float64& v) {
298   return __builtin_isnan(v.value_);
299 }
300 
SignBit(const Float32 & v)301 inline int SignBit(const Float32& v) {
302   return __builtin_signbitf(v.value_);
303 }
304 
SignBit(const Float64 & v)305 inline int SignBit(const Float64& v) {
306   return __builtin_signbit(v.value_);
307 }
308 
Sqrt(const Float32 & v)309 inline Float32 Sqrt(const Float32& v) {
310   return Float32(__builtin_sqrtf(v.value_));
311 }
312 
Sqrt(const Float64 & v)313 inline Float64 Sqrt(const Float64& v) {
314   return Float64(__builtin_sqrt(v.value_));
315 }
316 
317 // x*y + z
MulAdd(const Float32 & v1,const Float32 & v2,const Float32 & v3)318 inline Float32 MulAdd(const Float32& v1, const Float32& v2, const Float32& v3) {
319   return Float32(fmaf(v1.value_, v2.value_, v3.value_));
320 }
321 
MulAdd(const Float64 & v1,const Float64 & v2,const Float64 & v3)322 inline Float64 MulAdd(const Float64& v1, const Float64& v2, const Float64& v3) {
323   return Float64(fma(v1.value_, v2.value_, v3.value_));
324 }
325 
326 template <typename... Srcs>
AllAreNotNan(Srcs...srcs)327 bool AllAreNotNan(Srcs... srcs) {
328   for (const auto src : {srcs...}) {
329     if (IsNan(src)) {
330       return false;
331     }
332   }
333   return true;
334 }
335 
336 }  // namespace intrinsics
337 
338 }  // namespace berberis
339 
340 #endif  // BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
341