• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Single-precision erf(x) function.
3  *
4  * Copyright (c) 2020-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 #include "estrinf.h"
8 #include "hornerf.h"
9 #include "math_config.h"
10 #include "pl_sig.h"
11 #include "pl_test.h"
12 
13 #define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
14 #define A __erff_data.erff_poly_A
15 #define B __erff_data.erff_poly_B
16 
17 /* Top 12 bits of a float.  */
18 static inline uint32_t
top12(float x)19 top12 (float x)
20 {
21   return asuint (x) >> 20;
22 }
23 
24 /* Efficient implementation of erff using either a pure polynomial approximation
25    or the exponential of a polynomial. Worst-case error is 1.09ulps at
26    0x1.c111acp-1.  */
27 float
erff(float x)28 erff (float x)
29 {
30   float r, x2;
31 
32   /* Get top word.  */
33   uint32_t ix = asuint (x);
34   uint32_t sign = ix >> 31;
35   uint32_t ia12 = top12 (x) & 0x7ff;
36 
37   /* Limit of both intervals is 0.875 for performance reasons but coefficients
38      computed on [0.0, 0.921875] and [0.921875, 4.0], which brought accuracy
39      from 0.94 to 1.1ulps.  */
40   if (ia12 < 0x3f6)
41     { /* a = |x| < 0.875.  */
42 
43       /* Tiny and subnormal cases.  */
44       if (unlikely (ia12 < 0x318))
45 	{ /* |x| < 2^(-28).  */
46 	  if (unlikely (ia12 < 0x040))
47 	    { /* |x| < 2^(-119).  */
48 	      float y = fmaf (TwoOverSqrtPiMinusOne, x, x);
49 	      return check_uflowf (y);
50 	    }
51 	  return x + TwoOverSqrtPiMinusOne * x;
52 	}
53 
54       x2 = x * x;
55 
56       /* Normalized cases (|x| < 0.921875) - Use Horner scheme for x+x*P(x^2).
57        */
58 #define C(i) A[i]
59       r = fmaf (HORNER_5 (x2, C), x, x);
60 #undef C
61     }
62   else if (ia12 < 0x408)
63     { /* |x| < 4.0 - Use a custom Estrin scheme.  */
64 
65       float a = fabsf (x);
66       /* Use Estrin scheme on high order (small magnitude) coefficients.  */
67 #define C(i) B[i]
68       r = ESTRIN_3_ (a, x * x, C, 3);
69 #undef C
70       /* Then switch to pure Horner scheme.  */
71       r = fmaf (r, a, B[2]);
72       r = fmaf (r, a, B[1]);
73       r = fmaf (r, a, B[0]);
74       r = fmaf (r, a, a);
75       /* Single precision exponential with ~0.5ulps ensures erff has maximum
76 	 relative error below 1ulp on [0.921875, 4.0] and below 1.1ulps on
77 	 [0.875, 4.0].  */
78       r = expf (-r);
79       /* Explicit copysign (calling copysignf increases latency).  */
80       if (sign)
81 	r = -1.0f + r;
82       else
83 	r = 1.0f - r;
84     }
85   else
86     { /* |x| >= 4.0.  */
87 
88       /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1.  */
89       if (unlikely (ia12 >= 0x7f8))
90 	return (1.f - (float) ((ix >> 31) << 1)) + 1.f / x;
91 
92       /* Explicit copysign (calling copysignf increases latency).  */
93       if (sign)
94 	r = -1.0f;
95       else
96 	r = 1.0f;
97     }
98   return r;
99 }
100 
101 PL_SIG (S, F, 1, erf, -4.0, 4.0)
102 PL_TEST_ULP (erff, 0.6)
103 PL_TEST_INTERVAL (erff, 0, 0xffff0000, 10000)
104 PL_TEST_INTERVAL (erff, 0x1p-127, 0x1p-26, 40000)
105 PL_TEST_INTERVAL (erff, -0x1p-127, -0x1p-26, 40000)
106 PL_TEST_INTERVAL (erff, 0x1p-26, 0x1p3, 40000)
107 PL_TEST_INTERVAL (erff, -0x1p-26, -0x1p3, 40000)
108 PL_TEST_INTERVAL (erff, 0, inf, 40000)
109