• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Single-precision erfc(x) function.
3  *
4  * Copyright (c) 2019-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "erfcf.h"
9 #include "math_config.h"
10 #include "pl_sig.h"
11 #include "pl_test.h"
12 
13 #define P(i) __erfcf_poly_data.poly[i]
14 
15 /* Approximation of erfcf for |x| > 4.0.  */
16 static inline float
approx_erfcf_hi(float x,uint32_t sign,const double * coeff)17 approx_erfcf_hi (float x, uint32_t sign, const double *coeff)
18 {
19   if (sign)
20     {
21       return 2.0f;
22     }
23 
24   /* Polynomial contribution.  */
25   double z = (double) fabs (x);
26   float p = (float) eval_poly (z, coeff);
27   /* Gaussian contribution.  */
28   float e_mx2 = (float) eval_exp_mx2 (z);
29 
30   return p * e_mx2;
31 }
32 
33 /* Approximation of erfcf for |x| < 4.0.  */
34 static inline float
approx_erfcf_lo(float x,uint32_t sign,const double * coeff)35 approx_erfcf_lo (float x, uint32_t sign, const double *coeff)
36 {
37   /* Polynomial contribution.  */
38   double z = (double) fabs (x);
39   float p = (float) eval_poly (z, coeff);
40   /* Gaussian contribution.  */
41   float e_mx2 = (float) eval_exp_mx2 (z);
42 
43   if (sign)
44     return fmaf (-p, e_mx2, 2.0f);
45   else
46     return p * e_mx2;
47 }
48 
49 /* Top 12 bits of a float (sign and exponent bits).  */
50 static inline uint32_t
abstop12(float x)51 abstop12 (float x)
52 {
53   return (asuint (x) >> 20) & 0x7ff;
54 }
55 
56 /* Top 12 bits of a float.  */
57 static inline uint32_t
top12(float x)58 top12 (float x)
59 {
60   return asuint (x) >> 20;
61 }
62 
63 /* Fast erfcf approximation using polynomial approximation
64    multiplied by gaussian.
65    Most of the computation is carried out in double precision,
66    and is very sensitive to accuracy of polynomial and exp
67    evaluation.
68    Worst-case error is 1.968ulps, obtained for x = 2.0412941.
69    erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp
70    err 1.46788.  */
71 float
erfcf(float x)72 erfcf (float x)
73 {
74   /* Get top words and sign.  */
75   uint32_t ix = asuint (x); /* We need to compare at most 32 bits.  */
76   uint32_t sign = ix >> 31;
77   uint32_t ia12 = top12 (x) & 0x7ff;
78 
79   /* Handle special cases and small values with a single comparison:
80        abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
81 
82      Special cases
83        erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2
84 
85      Errno
86        EDOM does not have to be set in case of erfcf(nan).
87        Only ERANGE may be set in case of underflow.
88 
89      Small values (|x|<small)
90        |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328).  */
91   if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328))
92     {
93       if (abstop12 (x) >= 0x7f8)
94 	return (float) (sign << 1) + 1.0f / x; /* Special cases.  */
95       else
96 	return 1.0f - x; /* Small case.  */
97     }
98 
99   /* Normalized numbers divided in 4 intervals
100      with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for
101      the interesting region as it is the smallest value, representable as a
102      12-bit integer, for which returning 0 gives <1.5 ULP.  */
103   if (ia12 < 0x400)
104     {
105       return approx_erfcf_lo (x, sign, P (0));
106     }
107   if (ia12 < 0x408)
108     {
109       return approx_erfcf_lo (x, sign, P (1));
110     }
111   if (ia12 < 0x410)
112     {
113       return approx_erfcf_hi (x, sign, P (2));
114     }
115   if (ia12 < 0x412)
116     {
117       return approx_erfcf_hi (x, sign, P (3));
118     }
119   if (sign)
120     {
121       return 2.0f;
122     }
123   return __math_uflowf (0);
124 }
125 
126 PL_SIG (S, F, 1, erfc, -4.0, 10.0)
127 PL_TEST_ULP (erfcf, 1.5)
128 PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000)
129 PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000)
130 PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000)
131 PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000)
132 PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000)
133 PL_TEST_INTERVAL (erfcf, 0, inf, 40000)
134