1 //===-- Common header for fmod implementations ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H 10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H 11 12 #include "src/__support/CPP/bit.h" 13 #include "src/__support/CPP/limits.h" 14 #include "src/__support/CPP/type_traits.h" 15 #include "src/__support/FPUtil/FEnvImpl.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY 18 19 namespace LIBC_NAMESPACE { 20 namespace fputil { 21 namespace generic { 22 23 // Objective: 24 // The algorithm uses integer arithmetic (max uint64_t) for general case. 25 // Some common cases, like abs(x) < abs(y) or abs(x) < 1000 * abs(y) are 26 // treated specially to increase performance. The part of checking special 27 // cases, numbers NaN, INF etc. treated separately. 28 // 29 // Objective: 30 // 1) FMod definition (https://cplusplus.com/reference/cmath/fmod/): 31 // fmod = numer - tquot * denom, where tquot is the truncated 32 // (i.e., rounded towards zero) result of: numer/denom. 33 // 2) FMod with negative x and/or y can be trivially converted to fmod for 34 // positive x and y. Therefore the algorithm below works only with 35 // positive numbers. 36 // 3) All positive floating point numbers can be represented as m * 2^e, 37 // where "m" is positive integer and "e" is signed. 38 // 4) FMod function can be calculated in integer numbers (x > y): 39 // fmod = m_x * 2^e_x - tquot * m_y * 2^e_y 40 // = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y). 41 // All variables in parentheses are unsigned integers. 42 // 43 // Mathematical background: 44 // Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x 45 // and m_y*2^e_y. This is an ambiguous number representation. For example: 46 // m * 2^e = (2 * m) * 2^(e-1) 47 // The algorithm uses the facts that 48 // r = a % b = (a % (N * b)) % b, 49 // (a * c) % (b * c) = (a % b) * c 50 // where N is positive integer number. a, b and c - positive. Let's adopt 51 // the formula for representation above. 52 // a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k 53 // r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y) 54 // = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y) 55 // r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k) 56 // = (2^p * (m_x % m_y) * 2^(e_y + k - p)) 57 // m_r = 2^p * (m_x % m_y), e_r = m_y + k - p 58 // 59 // Algorithm description: 60 // First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y 61 // are integers (m_x amd m_y positive). 62 // Then the naive implementation of the fmod function with a simple 63 // for/while loop: 64 // while (e_x > e_y) { 65 // m_x *= 2; --e_x; // m_x * 2^e_x == 2 * m_x * 2^(e_x - 1) 66 // m_x %= m_y; 67 // } 68 // On the other hand, the algorithm exploits the fact that m_x, m_y are the 69 // mantissas of floating point numbers, which use less bits than the storage 70 // integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of 71 // the iteration, we can left shift m_x as many bits as the storage integer 72 // type can hold, the exponent reduction per step will be at least 32 - 24 = 8 73 // for floats and 64 - 53 = 11 for doubles (double example below): 74 // while (e_x > e_y) { 75 // m_x <<= 11; e_x -= 11; // m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11) 76 // m_x %= m_y; 77 // } 78 // Some extra improvements are done: 79 // 1) Shift m_y maximum to the right, which can significantly improve 80 // performance for small integer numbers (y = 3 for example). 81 // The m_x shift in the loop can be 62 instead of 11 for double. 82 // 2) For some architectures with very slow division, it can be better to 83 // calculate inverse value ones, and after do multiplication in the loop. 84 // 3) "likely" special cases are treated specially to improve performance. 85 // 86 // Simple example: 87 // The examples below use byte for simplicity. 88 // 1) Shift hy maximum to right without losing bits and increase iy value 89 // m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22. 90 // 2) m_x = m_x % m_y. 91 // 3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x 92 // is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000, 93 // e_x = 100-4 = 96. 94 // 4) Repeat (2) until e_x == e_y. 95 // 96 // Complexity analysis (double): 97 // Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop count: 98 // (m_x - m_y) / (64 - "length of m_y"). 99 // max("length of m_y") = 53, 100 // max(e_x - e_y) = 2048 101 // Maximum operation is 186. For rare "unrealistic" cases. 102 // 103 // Special cases (double): 104 // Supposing that case where |y| > 1e-292 and |x/y|<2000 is very common 105 // special processing is implemented. No m_y alignment, no loop: 106 // result = (m_x * 2^(e_x - e_y)) % m_y. 107 // When x and y are both subnormal (rare case but...) the 108 // result = m_x % m_y. 109 // Simplified conversion back to double. 110 111 // Exceptional cases handler according to cppreference.com 112 // https://en.cppreference.com/w/cpp/numeric/math/fmod 113 // and POSIX standard described in Linux man 114 // https://man7.org/linux/man-pages/man3/fmod.3p.html 115 // C standard for the function is not full, so not by default (although it can 116 // be implemented in another handler. 117 // Signaling NaN converted to quiet NaN with FE_INVALID exception. 118 // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm 119 template <typename T> struct FModDivisionSimpleHelper { executeFModDivisionSimpleHelper120 LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count, 121 T m_x, T m_y) { 122 while (exp_diff > sides_zeroes_count) { 123 exp_diff -= sides_zeroes_count; 124 m_x <<= sides_zeroes_count; 125 m_x %= m_y; 126 } 127 m_x <<= exp_diff; 128 m_x %= m_y; 129 return m_x; 130 } 131 }; 132 133 template <typename T> struct FModDivisionInvMultHelper { executeFModDivisionInvMultHelper134 LIBC_INLINE constexpr static T execute(int exp_diff, int sides_zeroes_count, 135 T m_x, T m_y) { 136 constexpr int LENGTH = sizeof(T) * CHAR_BIT; 137 if (exp_diff > sides_zeroes_count) { 138 T inv_hy = (cpp::numeric_limits<T>::max() / m_y); 139 while (exp_diff > sides_zeroes_count) { 140 exp_diff -= sides_zeroes_count; 141 T hd = (m_x * inv_hy) >> (LENGTH - sides_zeroes_count); 142 m_x <<= sides_zeroes_count; 143 m_x -= hd * m_y; 144 while (LIBC_UNLIKELY(m_x > m_y)) 145 m_x -= m_y; 146 } 147 T hd = (m_x * inv_hy) >> (LENGTH - exp_diff); 148 m_x <<= exp_diff; 149 m_x -= hd * m_y; 150 while (LIBC_UNLIKELY(m_x > m_y)) 151 m_x -= m_y; 152 } else { 153 m_x <<= exp_diff; 154 m_x %= m_y; 155 } 156 return m_x; 157 } 158 }; 159 160 template <typename T, typename U = typename FPBits<T>::StorageType, 161 typename DivisionHelper = FModDivisionSimpleHelper<U>> 162 class FMod { 163 static_assert(cpp::is_floating_point_v<T> && cpp::is_unsigned_v<U> && 164 (sizeof(U) * CHAR_BIT > FPBits<T>::FRACTION_LEN), 165 "FMod instantiated with invalid type."); 166 167 private: 168 using FPB = FPBits<T>; 169 using StorageType = typename FPB::StorageType; 170 pre_check(T x,T y,T & out)171 LIBC_INLINE static bool pre_check(T x, T y, T &out) { 172 using FPB = fputil::FPBits<T>; 173 const T quiet_nan = FPB::quiet_nan().get_val(); 174 FPB sx(x), sy(y); 175 if (LIBC_LIKELY(!sy.is_zero() && !sy.is_inf_or_nan() && 176 !sx.is_inf_or_nan())) 177 return false; 178 179 if (sx.is_nan() || sy.is_nan()) { 180 if (sx.is_signaling_nan() || sy.is_signaling_nan()) 181 fputil::raise_except_if_required(FE_INVALID); 182 out = quiet_nan; 183 return true; 184 } 185 186 if (sx.is_inf() || sy.is_zero()) { 187 fputil::raise_except_if_required(FE_INVALID); 188 fputil::set_errno_if_required(EDOM); 189 out = quiet_nan; 190 return true; 191 } 192 193 out = x; 194 return true; 195 } 196 eval_internal(FPB sx,FPB sy)197 LIBC_INLINE static constexpr FPB eval_internal(FPB sx, FPB sy) { 198 199 if (LIBC_LIKELY(sx.uintval() <= sy.uintval())) { 200 if (sx.uintval() < sy.uintval()) 201 return sx; // |x|<|y| return x 202 return FPB::zero(); // |x|=|y| return 0.0 203 } 204 205 int e_x = sx.get_biased_exponent(); 206 int e_y = sy.get_biased_exponent(); 207 208 // Most common case where |y| is "very normal" and |x/y| < 2^EXP_LEN 209 if (LIBC_LIKELY(e_y > int(FPB::FRACTION_LEN) && 210 e_x - e_y <= int(FPB::EXP_LEN))) { 211 StorageType m_x = sx.get_explicit_mantissa(); 212 StorageType m_y = sy.get_explicit_mantissa(); 213 StorageType d = (e_x == e_y) 214 ? (m_x - m_y) 215 : static_cast<StorageType>(m_x << (e_x - e_y)) % m_y; 216 if (d == 0) 217 return FPB::zero(); 218 // iy - 1 because of "zero power" for number with power 1 219 return FPB::make_value(d, e_y - 1); 220 } 221 // Both subnormal special case. 222 if (LIBC_UNLIKELY(e_x == 0 && e_y == 0)) { 223 FPB d; 224 d.set_mantissa(sx.uintval() % sy.uintval()); 225 return d; 226 } 227 228 // Note that hx is not subnormal by conditions above. 229 U m_x = static_cast<U>(sx.get_explicit_mantissa()); 230 e_x--; 231 232 U m_y = static_cast<U>(sy.get_explicit_mantissa()); 233 constexpr int DEFAULT_LEAD_ZEROS = 234 sizeof(U) * CHAR_BIT - FPB::FRACTION_LEN - 1; 235 int lead_zeros_m_y = DEFAULT_LEAD_ZEROS; 236 if (LIBC_LIKELY(e_y > 0)) { 237 e_y--; 238 } else { 239 m_y = static_cast<U>(sy.get_mantissa()); 240 lead_zeros_m_y = cpp::countl_zero(m_y); 241 } 242 243 // Assume hy != 0 244 int tail_zeros_m_y = cpp::countr_zero(m_y); 245 int sides_zeroes_count = lead_zeros_m_y + tail_zeros_m_y; 246 // n > 0 by conditions above 247 int exp_diff = e_x - e_y; 248 { 249 // Shift hy right until the end or n = 0 250 int right_shift = exp_diff < tail_zeros_m_y ? exp_diff : tail_zeros_m_y; 251 m_y >>= right_shift; 252 exp_diff -= right_shift; 253 e_y += right_shift; 254 } 255 256 { 257 // Shift hx left until the end or n = 0 258 int left_shift = 259 exp_diff < DEFAULT_LEAD_ZEROS ? exp_diff : DEFAULT_LEAD_ZEROS; 260 m_x <<= left_shift; 261 exp_diff -= left_shift; 262 } 263 264 m_x %= m_y; 265 if (LIBC_UNLIKELY(m_x == 0)) 266 return FPB::zero(); 267 268 if (exp_diff == 0) 269 return FPB::make_value(static_cast<StorageType>(m_x), e_y); 270 271 // hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0 272 m_x = DivisionHelper::execute(exp_diff, sides_zeroes_count, m_x, m_y); 273 return FPB::make_value(static_cast<StorageType>(m_x), e_y); 274 } 275 276 public: eval(T x,T y)277 LIBC_INLINE static T eval(T x, T y) { 278 if (T out; LIBC_UNLIKELY(pre_check(x, y, out))) 279 return out; 280 FPB sx(x), sy(y); 281 Sign sign = sx.sign(); 282 sx.set_sign(Sign::POS); 283 sy.set_sign(Sign::POS); 284 FPB result = eval_internal(sx, sy); 285 result.set_sign(sign); 286 return result.get_val(); 287 } 288 }; 289 290 } // namespace generic 291 } // namespace fputil 292 } // namespace LIBC_NAMESPACE 293 294 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMOD_H 295