• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
18 #define ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
19 
20 #include <cmath>
21 
22 #include "berberis/base/bit_util.h"
23 #include "berberis/base/logging.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h"  // Float32/Float64
25 #include "berberis/intrinsics/guest_rounding_modes.h"     // FE_HOSTROUND/FE_TIESAWAY
26 
27 namespace berberis::intrinsics {
28 
29 #define MAKE_BINARY_OPERATOR(guest_name, operator_name, assignment_name)                \
30                                                                                         \
31   inline Float32 operator operator_name(const Float32& v1, const Float32& v2) {         \
32     Float32 result;                                                                     \
33     asm(#guest_name "ss %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
34     return result;                                                                      \
35   }                                                                                     \
36                                                                                         \
37   inline Float32& operator assignment_name(Float32 & v1, const Float32 & v2) {          \
38     asm(#guest_name "ss %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
39     return v1;                                                                          \
40   }                                                                                     \
41                                                                                         \
42   inline Float64 operator operator_name(const Float64& v1, const Float64& v2) {         \
43     Float64 result;                                                                     \
44     asm(#guest_name "sd %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
45     return result;                                                                      \
46   }                                                                                     \
47                                                                                         \
48   inline Float64& operator assignment_name(Float64 & v1, const Float64 & v2) {          \
49     asm(#guest_name "sd %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_));     \
50     return v1;                                                                          \
51   }
52 
53 MAKE_BINARY_OPERATOR(add, +, +=)
54 MAKE_BINARY_OPERATOR(sub, -, -=)
55 MAKE_BINARY_OPERATOR(mul, *, *=)
56 MAKE_BINARY_OPERATOR(div, /, /=)
57 
58 #undef MAKE_BINARY_OPERATOR
59 
60 inline bool operator<(const Float32& v1, const Float32& v2) {
61   bool result;
62   asm("ucomiss %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
63   return result;
64 }
65 
66 inline bool operator<(const Float64& v1, const Float64& v2) {
67   bool result;
68   asm("ucomisd %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
69   return result;
70 }
71 
72 inline bool operator>(const Float32& v1, const Float32& v2) {
73   bool result;
74   asm("ucomiss %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
75   return result;
76 }
77 
78 inline bool operator>(const Float64& v1, const Float64& v2) {
79   bool result;
80   asm("ucomisd %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
81   return result;
82 }
83 
84 inline bool operator<=(const Float32& v1, const Float32& v2) {
85   bool result;
86   asm("ucomiss %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
87   return result;
88 }
89 
90 inline bool operator<=(const Float64& v1, const Float64& v2) {
91   bool result;
92   asm("ucomisd %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
93   return result;
94 }
95 
96 inline bool operator>=(const Float32& v1, const Float32& v2) {
97   bool result;
98   asm("ucomiss %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
99   return result;
100 }
101 
102 inline bool operator>=(const Float64& v1, const Float64& v2) {
103   bool result;
104   asm("ucomisd %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
105   return result;
106 }
107 
108 inline bool operator==(const Float32& v1, const Float32& v2) {
109   float result;
110   asm("cmpeqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
111   return bit_cast<uint32_t, float>(result) & 0x1;
112 }
113 
114 inline bool operator==(const Float64& v1, const Float64& v2) {
115   double result;
116   asm("cmpeqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
117   return bit_cast<uint64_t, double>(result) & 0x1;
118 }
119 
120 inline bool operator!=(const Float32& v1, const Float32& v2) {
121   float result;
122   asm("cmpneqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
123   return bit_cast<uint32_t, float>(result) & 0x1;
124 }
125 
126 inline bool operator!=(const Float64& v1, const Float64& v2) {
127   double result;
128   asm("cmpneqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
129   return bit_cast<uint64_t, double>(result) & 0x1;
130 }
131 
132 // It's NOT safe to use ANY functions which return float or double.  That's because IA32 ABI uses
133 // x87 stack to pass arguments (and does that even with -mfpmath=sse) and NaN float and
134 // double values would be corrupted if pushed on it.
135 
Negative(const Float32 & v)136 inline Float32 Negative(const Float32& v) {
137   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
138   Float32 result;
139   uint64_t sign_bit = 0x80000000U;
140   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
141   return result;
142 }
143 
Negative(const Float64 & v)144 inline Float64 Negative(const Float64& v) {
145   // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
146   Float64 result;
147   uint64_t sign_bit = 0x8000000000000000ULL;
148   asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
149   return result;
150 }
151 
152 template <typename FloatType>
FPRoundTiesAway(WrappedFloatType<FloatType> value)153 inline WrappedFloatType<FloatType> FPRoundTiesAway(WrappedFloatType<FloatType> value) {
154   // Since x86 does not support this rounding mode exactly, we must manually handle the
155   // tie-aways (from ±x.5).
156   WrappedFloatType<FloatType> value_rounded_up = FPRound(value, FE_UPWARD);
157   // Check if value has fraction of exactly 0.5.
158   // Note that this check can produce spurious true and/or false results for numbers that are too
159   // large to have fraction parts. We don't care because for such numbers all three possible FPRound
160   // calls above and below produce the exact same result (which is the same as original value).
161   if (value == value_rounded_up - WrappedFloatType<FloatType>{0.5f}) {
162     if (SignBit(value)) {
163       // If value is negative then FE_TIESAWAY acts as FE_DOWNWARD.
164       return FPRound(value, FE_DOWNWARD);
165     } else {
166       // If value is negative then FE_TIESAWAY acts as FE_UPWARD.
167       return value_rounded_up;
168     }
169   }
170   // Otherwise FE_TIESAWAY acts as FE_TONEAREST.
171   return FPRound(value, FE_TONEAREST);
172 }
173 
FPRound(const Float32 & value,int round_control)174 inline Float32 FPRound(const Float32& value, int round_control) {
175   Float32 result;
176   switch (round_control) {
177     case FE_HOSTROUND:
178       asm("roundss $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
179       break;
180     case FE_TONEAREST:
181       asm("roundss $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
182       break;
183     case FE_DOWNWARD:
184       asm("roundss $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
185       break;
186     case FE_UPWARD:
187       asm("roundss $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
188       break;
189     case FE_TOWARDZERO:
190       asm("roundss $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
191       break;
192     case FE_TIESAWAY:
193       result = FPRoundTiesAway(value);
194       break;
195     default:
196       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
197       result.value_ = 0.f;
198   }
199   return result;
200 }
201 
FPRound(const Float64 & value,int round_control)202 inline Float64 FPRound(const Float64& value, int round_control) {
203   Float64 result;
204   switch (round_control) {
205     case FE_HOSTROUND:
206       asm("roundsd $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
207       break;
208     case FE_TONEAREST:
209       asm("roundsd $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
210       break;
211     case FE_DOWNWARD:
212       asm("roundsd $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
213       break;
214     case FE_UPWARD:
215       asm("roundsd $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
216       break;
217     case FE_TOWARDZERO:
218       asm("roundsd $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
219       break;
220     case FE_TIESAWAY:
221       result = FPRoundTiesAway(value);
222       break;
223     default:
224       LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
225       result.value_ = 0.;
226   }
227   return result;
228 }
229 
230 }  // namespace berberis::intrinsics
231 
232 #endif  // ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
233