1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
18 #define ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
19
20 #include <cmath>
21
22 #include "berberis/base/bit_util.h"
23 #include "berberis/base/logging.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h" // Float32/Float64
25 #include "berberis/intrinsics/guest_rounding_modes.h" // FE_HOSTROUND/FE_TIESAWAY
26
27 namespace berberis::intrinsics {
28
29 #define MAKE_BINARY_OPERATOR(guest_name, operator_name, assignment_name) \
30 \
31 inline Float32 operator operator_name(const Float32& v1, const Float32& v2) { \
32 Float32 result; \
33 asm(#guest_name "ss %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
34 return result; \
35 } \
36 \
37 inline Float32& operator assignment_name(Float32 & v1, const Float32 & v2) { \
38 asm(#guest_name "ss %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_)); \
39 return v1; \
40 } \
41 \
42 inline Float64 operator operator_name(const Float64& v1, const Float64& v2) { \
43 Float64 result; \
44 asm(#guest_name "sd %2,%0" : "=x"(result.value_) : "0"(v1.value_), "x"(v2.value_)); \
45 return result; \
46 } \
47 \
48 inline Float64& operator assignment_name(Float64 & v1, const Float64 & v2) { \
49 asm(#guest_name "sd %2,%0" : "=x"(v1.value_) : "0"(v1.value_), "x"(v2.value_)); \
50 return v1; \
51 }
52
53 MAKE_BINARY_OPERATOR(add, +, +=)
54 MAKE_BINARY_OPERATOR(sub, -, -=)
55 MAKE_BINARY_OPERATOR(mul, *, *=)
56 MAKE_BINARY_OPERATOR(div, /, /=)
57
58 #undef MAKE_BINARY_OPERATOR
59
60 inline bool operator<(const Float32& v1, const Float32& v2) {
61 bool result;
62 asm("ucomiss %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
63 return result;
64 }
65
66 inline bool operator<(const Float64& v1, const Float64& v2) {
67 bool result;
68 asm("ucomisd %1,%2\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
69 return result;
70 }
71
72 inline bool operator>(const Float32& v1, const Float32& v2) {
73 bool result;
74 asm("ucomiss %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
75 return result;
76 }
77
78 inline bool operator>(const Float64& v1, const Float64& v2) {
79 bool result;
80 asm("ucomisd %2,%1\n seta %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
81 return result;
82 }
83
84 inline bool operator<=(const Float32& v1, const Float32& v2) {
85 bool result;
86 asm("ucomiss %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
87 return result;
88 }
89
90 inline bool operator<=(const Float64& v1, const Float64& v2) {
91 bool result;
92 asm("ucomisd %1,%2\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
93 return result;
94 }
95
96 inline bool operator>=(const Float32& v1, const Float32& v2) {
97 bool result;
98 asm("ucomiss %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
99 return result;
100 }
101
102 inline bool operator>=(const Float64& v1, const Float64& v2) {
103 bool result;
104 asm("ucomisd %2,%1\n setnb %0" : "=q"(result) : "x"(v1.value_), "x"(v2.value_) : "cc");
105 return result;
106 }
107
108 inline bool operator==(const Float32& v1, const Float32& v2) {
109 float result;
110 asm("cmpeqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
111 return bit_cast<uint32_t, float>(result) & 0x1;
112 }
113
114 inline bool operator==(const Float64& v1, const Float64& v2) {
115 double result;
116 asm("cmpeqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
117 return bit_cast<uint64_t, double>(result) & 0x1;
118 }
119
120 inline bool operator!=(const Float32& v1, const Float32& v2) {
121 float result;
122 asm("cmpneqss %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
123 return bit_cast<uint32_t, float>(result) & 0x1;
124 }
125
126 inline bool operator!=(const Float64& v1, const Float64& v2) {
127 double result;
128 asm("cmpneqsd %2,%0" : "=x"(result) : "0"(v1.value_), "x"(v2.value_));
129 return bit_cast<uint64_t, double>(result) & 0x1;
130 }
131
132 // It's NOT safe to use ANY functions which return float or double. That's because IA32 ABI uses
133 // x87 stack to pass arguments (and does that even with -mfpmath=sse) and NaN float and
134 // double values would be corrupted if pushed on it.
135
Negative(const Float32 & v)136 inline Float32 Negative(const Float32& v) {
137 // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
138 Float32 result;
139 uint64_t sign_bit = 0x80000000U;
140 asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
141 return result;
142 }
143
Negative(const Float64 & v)144 inline Float64 Negative(const Float64& v) {
145 // TODO(b/120563432): Simple -v.value_ doesn't work after a clang update.
146 Float64 result;
147 uint64_t sign_bit = 0x8000000000000000ULL;
148 asm("pxor %2, %0" : "=x"(result.value_) : "0"(v.value_), "x"(sign_bit));
149 return result;
150 }
151
152 template <typename FloatType>
FPRoundTiesAway(WrappedFloatType<FloatType> value)153 inline WrappedFloatType<FloatType> FPRoundTiesAway(WrappedFloatType<FloatType> value) {
154 // Since x86 does not support this rounding mode exactly, we must manually handle the
155 // tie-aways (from ±x.5).
156 WrappedFloatType<FloatType> value_rounded_up = FPRound(value, FE_UPWARD);
157 // Check if value has fraction of exactly 0.5.
158 // Note that this check can produce spurious true and/or false results for numbers that are too
159 // large to have fraction parts. We don't care because for such numbers all three possible FPRound
160 // calls above and below produce the exact same result (which is the same as original value).
161 if (value == value_rounded_up - WrappedFloatType<FloatType>{0.5f}) {
162 if (SignBit(value)) {
163 // If value is negative then FE_TIESAWAY acts as FE_DOWNWARD.
164 return FPRound(value, FE_DOWNWARD);
165 } else {
166 // If value is negative then FE_TIESAWAY acts as FE_UPWARD.
167 return value_rounded_up;
168 }
169 }
170 // Otherwise FE_TIESAWAY acts as FE_TONEAREST.
171 return FPRound(value, FE_TONEAREST);
172 }
173
FPRound(const Float32 & value,int round_control)174 inline Float32 FPRound(const Float32& value, int round_control) {
175 Float32 result;
176 switch (round_control) {
177 case FE_HOSTROUND:
178 asm("roundss $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
179 break;
180 case FE_TONEAREST:
181 asm("roundss $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
182 break;
183 case FE_DOWNWARD:
184 asm("roundss $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
185 break;
186 case FE_UPWARD:
187 asm("roundss $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
188 break;
189 case FE_TOWARDZERO:
190 asm("roundss $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
191 break;
192 case FE_TIESAWAY:
193 result = FPRoundTiesAway(value);
194 break;
195 default:
196 LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
197 result.value_ = 0.f;
198 }
199 return result;
200 }
201
FPRound(const Float64 & value,int round_control)202 inline Float64 FPRound(const Float64& value, int round_control) {
203 Float64 result;
204 switch (round_control) {
205 case FE_HOSTROUND:
206 asm("roundsd $4,%1,%0" : "=x"(result.value_) : "x"(value.value_));
207 break;
208 case FE_TONEAREST:
209 asm("roundsd $0,%1,%0" : "=x"(result.value_) : "x"(value.value_));
210 break;
211 case FE_DOWNWARD:
212 asm("roundsd $1,%1,%0" : "=x"(result.value_) : "x"(value.value_));
213 break;
214 case FE_UPWARD:
215 asm("roundsd $2,%1,%0" : "=x"(result.value_) : "x"(value.value_));
216 break;
217 case FE_TOWARDZERO:
218 asm("roundsd $3,%1,%0" : "=x"(result.value_) : "x"(value.value_));
219 break;
220 case FE_TIESAWAY:
221 result = FPRoundTiesAway(value);
222 break;
223 default:
224 LOG_ALWAYS_FATAL("Internal error: unknown round_control in FPRound!");
225 result.value_ = 0.;
226 }
227 return result;
228 }
229
230 } // namespace berberis::intrinsics
231
232 #endif // ALL_TO_X86_32_OR_x86_64_BERBERIS_INTRINSICS_INTRINSICS_FLOAT_H_
233