1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
18 #define BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
19
20 #include <cmath>
21 #include <limits>
22
23 #include "berberis/base/bit_util.h"
24 #include "berberis/intrinsics/common/intrinsics_float.h"
25 #include "berberis/intrinsics/common_to_x86/intrinsics_float.h"
26 #include "berberis/intrinsics/guest_cpu_flags.h" // ToHostRoundingMode
27 #include "berberis/intrinsics/guest_rounding_modes.h" // ScopedRoundingMode
28 #include "berberis/intrinsics/type_traits.h"
29
30 namespace berberis::intrinsics {
31
32 // x86 architecture doesn't support RMM (aka FE_TIESAWAY), but it can be easily emulated since it
33 // have support for 80bit floats: if calculations are done with one bit (or more) of extra precision
34 // in the FE_TOWARDZERO mode then we can easily adjust fraction part and would only need to remember
35 // this addition may overflow.
36 template <typename FloatType, typename OperationType, typename... Args>
ExecuteFloatOperationRmm(OperationType operation,Args...args)37 inline FloatType ExecuteFloatOperationRmm(OperationType operation, Args... args) {
38 using Wide = typename TypeTraits<FloatType>::Wide;
39 Wide wide_result = operation(static_cast<typename TypeTraits<Args>::Wide>(args)...);
40 if constexpr (std::is_same_v<FloatType, Float32>) {
41 // In the 32bit->64bit case everything happens almost automatically, we just need to clear low
42 // bits to ensure that we are getting ±∞ and not NaN.
43 auto int_result = bit_cast<std::make_unsigned_t<typename TypeTraits<Wide>::Int>>(wide_result);
44 if ((int_result & 0x7ff0'0000'0000'0000) == 0x7ff0'0000'0000'0000) {
45 return FloatType(wide_result);
46 }
47 int_result += 0x0000'0000'1000'0000;
48 int_result &= 0xffff'ffff'e000'0000;
49 wide_result = bit_cast<Wide>(int_result);
50 } else if constexpr (std::is_same_v<FloatType, Float64>) {
51 // In 64bit->80bit case we need to adjust significand bits to ensure we are creating ±∞ and not
52 // pseudo-infinity (supported on 8087/80287, but not on modern CPUs).
53 struct {
54 uint64_t significand;
55 uint16_t exponent;
56 uint8_t padding[sizeof(Wide) - sizeof(uint64_t) - sizeof(uint16_t)];
57 } fp80_parts;
58 static_assert(sizeof fp80_parts == sizeof(Wide));
59 memcpy(&fp80_parts, &wide_result, sizeof(wide_result));
60 // Don't try to round ±∞, NaNs and ±0 (denormals are not supported by RISC-V).
61 if ((fp80_parts.exponent & 0x7fff) == 0x7fff ||
62 (fp80_parts.significand & 0x8000'0000'0000'0000) == 0) {
63 return FloatType(wide_result);
64 }
65 fp80_parts.significand += 0x0000'0000'0000'0400;
66 fp80_parts.significand &= 0xffff'ffff'ffff'f800;
67 if (fp80_parts.significand == 0) {
68 fp80_parts.exponent++;
69 fp80_parts.significand = 0x8000'0000'0000'0000;
70 }
71 memcpy(&wide_result, &fp80_parts, sizeof(wide_result));
72 }
73 return FloatType(wide_result);
74 }
75
76 // Note: first round of rm/frm verification must happen before that function because RISC-V
77 // postulates that invalid rm or frm should trigger illegal instruction exception.
78 // Here we can assume both rm and frm fields are valid.
79 template <typename FloatType, typename OperationType, typename... Args>
ExecuteFloatOperation(uint8_t requested_rm,uint8_t current_rm,OperationType operation,Args...args)80 inline FloatType ExecuteFloatOperation(uint8_t requested_rm,
81 uint8_t current_rm,
82 OperationType operation,
83 Args... args) {
84 int host_requested_rm = ToHostRoundingMode(requested_rm);
85 int host_current_rm = ToHostRoundingMode(current_rm);
86 if (requested_rm == FPFlags::DYN || host_requested_rm == host_current_rm) {
87 uint8_t rm = requested_rm == FPFlags::DYN ? current_rm : requested_rm;
88 if (rm == FPFlags::RMM) {
89 return ExecuteFloatOperationRmm<FloatType>(operation, args...);
90 }
91 return operation(args...);
92 }
93 ScopedRoundingMode scoped_rounding_mode{host_requested_rm};
94 if (requested_rm == FPFlags::RMM) {
95 return ExecuteFloatOperationRmm<FloatType>(operation, args...);
96 }
97 return operation(args...);
98 }
99
100 // From RISC-V ISA manual: Single-Precision Floating-Point Computational Instructions.
101 // Covers behavior for both single and double precision floating point comparisons.
102 #define DEFINE_FLOAT_COMPARE_FUNC(FuncName, FloatType, ZeroVal, Intrinsic) \
103 inline FloatType FuncName(FloatType op1, FloatType op2) { \
104 FPInfo op1_class = FPClassify(op1); \
105 FPInfo op2_class = FPClassify(op2); \
106 if (op1_class == FPInfo::kZero && op2_class == FPInfo::kZero && \
107 SignBit(op1) != SignBit(op2)) { \
108 return FloatType(ZeroVal); \
109 } \
110 /* If both inputs are NaNs, the result is the canonical NaN. */ \
111 if (op1_class == FPInfo::kNaN && op2_class == FPInfo::kNaN) { \
112 return std::numeric_limits<FloatType>::quiet_NaN(); \
113 } \
114 /* If only one operand is a NaN, the result is the non-NaN operand. */ \
115 if (op1_class == FPInfo::kNaN) { \
116 return op2; \
117 } \
118 if (op2_class == FPInfo::kNaN) { \
119 return op1; \
120 } \
121 return FloatType(Intrinsic(op1.value_, op2.value_)); \
122 }
123 DEFINE_FLOAT_COMPARE_FUNC(Max, Float32, +0.f, std::fmax);
124 DEFINE_FLOAT_COMPARE_FUNC(Max, Float64, +0.f, std::fmax);
125 DEFINE_FLOAT_COMPARE_FUNC(Min, Float32, -0.f, std::fmin);
126 DEFINE_FLOAT_COMPARE_FUNC(Min, Float64, -0.f, std::fmin);
127 #undef DEFINE_FLOAT_COMPARE_FUNC
128
129 // We only need Negative(long double) for FMA, b/120563432 doesn't affect this function.
Negative(const long double & v)130 inline long double Negative(const long double& v) {
131 return -v;
132 }
133
Sqrt(const long double & v)134 inline long double Sqrt(const long double& v) {
135 return sqrt(v);
136 }
137
MulAdd(const long double & v1,const long double & v2,const long double & v3)138 inline long double MulAdd(const long double& v1, const long double& v2, const long double& v3) {
139 return fma(v1, v2, v3);
140 }
141
142 } // namespace berberis::intrinsics
143
144 #endif // BERBERIS_INTRINSICS_RISCV64_TO_X86_64_INTRINSICS_FLOAT_H_
145