• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Double-precision erfc(x) function.
3  *
4  * Copyright (c) 2019-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "math_config.h"
9 #include "pairwise_horner.h"
10 #include "pl_sig.h"
11 #include "pl_test.h"
12 
13 #define AbsMask (0x7fffffffffffffff)
14 
15 #define xint __erfc_data.interval_bounds
16 #define PX __erfc_data.poly
17 
18 /* Accurate exponential from optimized routines.  */
19 double
20 __exp_dd (double x, double xtail);
21 
22 static inline double
eval_poly_horner(double z,int i)23 eval_poly_horner (double z, int i)
24 {
25   double z2 = z * z;
26 #define C(j) PX[i][j]
27   return PAIRWISE_HORNER_12 (z, z2, C);
28 }
29 
30 /* Accurate evaluation of exp(x^2)
31    using compensated product (x^2 ~ x*x + e2)
32    and the __exp_dd(y,d) routine, that is the
33    computation of exp(y+d) with a small correction d<<y.  */
34 static inline double
eval_accurate_gaussian(double a)35 eval_accurate_gaussian (double a)
36 {
37   double e2;
38   double a2 = a * a;
39   double aa1 = -fma (0x1.0000002p27, a, -a);
40   aa1 = fma (0x1.0000002p27, a, aa1);
41   double aa2 = a - aa1;
42   e2 = fma (-aa1, aa1, a2);
43   e2 = fma (-aa1, aa2, e2);
44   e2 = fma (-aa2, aa1, e2);
45   e2 = fma (-aa2, aa2, e2);
46   return __exp_dd (-a2, e2);
47 }
48 
49 /* Approximation of erfc for |x| > 6.0.  */
50 static inline double
approx_erfc_hi(double x,int i)51 approx_erfc_hi (double x, int i)
52 {
53   double a = fabs (x);
54   double z = a - xint[i];
55   double p = eval_poly_horner (z, i);
56   double e_mx2 = eval_accurate_gaussian (a);
57   return p * e_mx2;
58 }
59 
60 static inline int
get_itv_idx(double x)61 get_itv_idx (double x)
62 {
63   /* Interval bounds are a logarithmic scale, i.e. interval n has
64      lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain
65      the interval index.  */
66   double a = asdouble (asuint64 (x) & AbsMask);
67   double z = a + 1.0;
68   z = z * z;
69   z = z * z;
70   return (asuint64 (z) >> 52) - 1023;
71 }
72 
73 /* Approximation of erfc for |x| < 6.0.  */
74 static inline double
approx_erfc_lo(double x,uint32_t sign,int i)75 approx_erfc_lo (double x, uint32_t sign, int i)
76 {
77   double a = fabs (x);
78   double z = a - xint[i];
79   double p = eval_poly_horner (z, i);
80   double e_mx2 = eval_accurate_gaussian (a);
81   if (sign)
82     return fma (-p, e_mx2, 2.0);
83   else
84     return p * e_mx2;
85 }
86 
87 /* Top 12 bits of a double (sign and exponent bits).  */
88 static inline uint32_t
abstop12(double x)89 abstop12 (double x)
90 {
91   return (asuint64 (x) >> 52) & 0x7ff;
92 }
93 
94 /* Top 32 bits of a double.  */
95 static inline uint32_t
top32(double x)96 top32 (double x)
97 {
98   return asuint64 (x) >> 32;
99 }
100 
101 /* Fast erfc implementation.
102    The approximation uses polynomial approximation of
103    exp(x^2) * erfc(x) with fixed orders on 20 intervals.
104    Maximum measured error is 4.05 ULPs:.
105    erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2
106 			     want 0x1.ff84036f8f0b7p-2.  */
107 double
erfc(double x)108 erfc (double x)
109 {
110   /* Get top words.  */
111   uint32_t ix = top32 (x); /* We need to compare at most 32 bits.  */
112   uint32_t ia = ix & 0x7fffffff;
113   uint32_t sign = ix >> 31;
114 
115   /* Handle special cases and small values with a single comparison:
116      abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
117      Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2
118      Errno EDOM does not have to be set in case of erfc(nan).
119      Only ERANGE may be set in case of underflow.
120      Small values (|x|<small)
121        |x|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7)
122        |x|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd).  */
123   if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd))
124     {
125       if (abstop12 (x) >= 0x7ff)
126 	return (double) (sign << 1) + 1.0 / x; /* special cases.  */
127       else
128 	return 1.0 - x; /* small case.  */
129     }
130   else if (ia < 0x40180000)
131     { /* |x| < 6.0.  */
132       return approx_erfc_lo (x, sign, get_itv_idx (x));
133     }
134   else if (sign)
135     { /* x <= -6.0.  */
136       return 2.0;
137     }
138   else if (ia < 0x403c0000)
139     { /* 6.0 <= x < 28.  */
140       return approx_erfc_hi (x, get_itv_idx (x));
141     }
142   else
143     { /* x > 28.  */
144       return __math_uflow (0);
145     }
146 }
147 
148 PL_SIG (S, D, 1, erfc, -6.0, 28.0)
149 PL_TEST_ULP (erfc, 3.56)
150 PL_TEST_INTERVAL (erfc, 0, 0xffff0000, 10000)
151 PL_TEST_INTERVAL (erfc, 0x1p-1022, 0x1p-26, 40000)
152 PL_TEST_INTERVAL (erfc, -0x1p-1022, -0x1p-26, 40000)
153 PL_TEST_INTERVAL (erfc, 0x1p-26, 0x1p5, 40000)
154 PL_TEST_INTERVAL (erfc, -0x1p-26, -0x1p3, 40000)
155 PL_TEST_INTERVAL (erfc, 0, inf, 40000)
156