1 /*
2 * Single-precision erfc(x) function.
3 *
4 * Copyright (c) 2019-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #include "erfcf.h"
9 #include "math_config.h"
10 #include "pl_sig.h"
11 #include "pl_test.h"
12
13 #define P(i) __erfcf_poly_data.poly[i]
14
15 /* Approximation of erfcf for |x| > 4.0. */
16 static inline float
approx_erfcf_hi(float x,uint32_t sign,const double * coeff)17 approx_erfcf_hi (float x, uint32_t sign, const double *coeff)
18 {
19 if (sign)
20 {
21 return 2.0f;
22 }
23
24 /* Polynomial contribution. */
25 double z = (double) fabs (x);
26 float p = (float) eval_poly (z, coeff);
27 /* Gaussian contribution. */
28 float e_mx2 = (float) eval_exp_mx2 (z);
29
30 return p * e_mx2;
31 }
32
33 /* Approximation of erfcf for |x| < 4.0. */
34 static inline float
approx_erfcf_lo(float x,uint32_t sign,const double * coeff)35 approx_erfcf_lo (float x, uint32_t sign, const double *coeff)
36 {
37 /* Polynomial contribution. */
38 double z = (double) fabs (x);
39 float p = (float) eval_poly (z, coeff);
40 /* Gaussian contribution. */
41 float e_mx2 = (float) eval_exp_mx2 (z);
42
43 if (sign)
44 return fmaf (-p, e_mx2, 2.0f);
45 else
46 return p * e_mx2;
47 }
48
49 /* Top 12 bits of a float (sign and exponent bits). */
50 static inline uint32_t
abstop12(float x)51 abstop12 (float x)
52 {
53 return (asuint (x) >> 20) & 0x7ff;
54 }
55
56 /* Top 12 bits of a float. */
57 static inline uint32_t
top12(float x)58 top12 (float x)
59 {
60 return asuint (x) >> 20;
61 }
62
63 /* Fast erfcf approximation using polynomial approximation
64 multiplied by gaussian.
65 Most of the computation is carried out in double precision,
66 and is very sensitive to accuracy of polynomial and exp
67 evaluation.
68 Worst-case error is 1.968ulps, obtained for x = 2.0412941.
69 erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp
70 err 1.46788. */
71 float
erfcf(float x)72 erfcf (float x)
73 {
74 /* Get top words and sign. */
75 uint32_t ix = asuint (x); /* We need to compare at most 32 bits. */
76 uint32_t sign = ix >> 31;
77 uint32_t ia12 = top12 (x) & 0x7ff;
78
79 /* Handle special cases and small values with a single comparison:
80 abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
81
82 Special cases
83 erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2
84
85 Errno
86 EDOM does not have to be set in case of erfcf(nan).
87 Only ERANGE may be set in case of underflow.
88
89 Small values (|x|<small)
90 |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */
91 if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328))
92 {
93 if (abstop12 (x) >= 0x7f8)
94 return (float) (sign << 1) + 1.0f / x; /* Special cases. */
95 else
96 return 1.0f - x; /* Small case. */
97 }
98
99 /* Normalized numbers divided in 4 intervals
100 with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for
101 the interesting region as it is the smallest value, representable as a
102 12-bit integer, for which returning 0 gives <1.5 ULP. */
103 if (ia12 < 0x400)
104 {
105 return approx_erfcf_lo (x, sign, P (0));
106 }
107 if (ia12 < 0x408)
108 {
109 return approx_erfcf_lo (x, sign, P (1));
110 }
111 if (ia12 < 0x410)
112 {
113 return approx_erfcf_hi (x, sign, P (2));
114 }
115 if (ia12 < 0x412)
116 {
117 return approx_erfcf_hi (x, sign, P (3));
118 }
119 if (sign)
120 {
121 return 2.0f;
122 }
123 return __math_uflowf (0);
124 }
125
126 PL_SIG (S, F, 1, erfc, -4.0, 10.0)
127 PL_TEST_ULP (erfcf, 1.5)
128 PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000)
129 PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000)
130 PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000)
131 PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000)
132 PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000)
133 PL_TEST_INTERVAL (erfcf, 0, inf, 40000)
134