• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Double-precision polynomial evaluation function for SVE atan(x) and
3  * atan2(y,x).
4  *
5  * Copyright (c) 2021-2023, Arm Limited.
6  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7  */
8 
9 #include "math_config.h"
10 #include "sv_math.h"
11 
12 #define P(i) sv_f64 (__atan_poly_data.poly[i])
13 
14 /* Polynomial used in fast SVE atan(x) and atan2(y,x) implementations
15    The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
16 static inline sv_f64_t
__sv_atan_common(svbool_t pg,svbool_t red,sv_f64_t z,sv_f64_t az,sv_f64_t shift)17 __sv_atan_common (svbool_t pg, svbool_t red, sv_f64_t z, sv_f64_t az,
18 		  sv_f64_t shift)
19 {
20   /* Use full Estrin scheme for P(z^2) with deg(P)=19.  */
21   sv_f64_t z2 = svmul_f64_x (pg, z, z);
22 
23   /* Level 1.  */
24   sv_f64_t P_1_0 = sv_fma_f64_x (pg, P (1), z2, P (0));
25   sv_f64_t P_3_2 = sv_fma_f64_x (pg, P (3), z2, P (2));
26   sv_f64_t P_5_4 = sv_fma_f64_x (pg, P (5), z2, P (4));
27   sv_f64_t P_7_6 = sv_fma_f64_x (pg, P (7), z2, P (6));
28   sv_f64_t P_9_8 = sv_fma_f64_x (pg, P (9), z2, P (8));
29   sv_f64_t P_11_10 = sv_fma_f64_x (pg, P (11), z2, P (10));
30   sv_f64_t P_13_12 = sv_fma_f64_x (pg, P (13), z2, P (12));
31   sv_f64_t P_15_14 = sv_fma_f64_x (pg, P (15), z2, P (14));
32   sv_f64_t P_17_16 = sv_fma_f64_x (pg, P (17), z2, P (16));
33   sv_f64_t P_19_18 = sv_fma_f64_x (pg, P (19), z2, P (18));
34 
35   /* Level 2.  */
36   sv_f64_t x2 = svmul_f64_x (pg, z2, z2);
37   sv_f64_t P_3_0 = sv_fma_f64_x (pg, P_3_2, x2, P_1_0);
38   sv_f64_t P_7_4 = sv_fma_f64_x (pg, P_7_6, x2, P_5_4);
39   sv_f64_t P_11_8 = sv_fma_f64_x (pg, P_11_10, x2, P_9_8);
40   sv_f64_t P_15_12 = sv_fma_f64_x (pg, P_15_14, x2, P_13_12);
41   sv_f64_t P_19_16 = sv_fma_f64_x (pg, P_19_18, x2, P_17_16);
42 
43   /* Level 3.  */
44   sv_f64_t x4 = svmul_f64_x (pg, x2, x2);
45   sv_f64_t P_7_0 = sv_fma_f64_x (pg, P_7_4, x4, P_3_0);
46   sv_f64_t P_15_8 = sv_fma_f64_x (pg, P_15_12, x4, P_11_8);
47 
48   /* Level 4.  */
49   sv_f64_t x8 = svmul_f64_x (pg, x4, x4);
50   sv_f64_t y = sv_fma_f64_x (pg, P_19_16, x8, P_15_8);
51   y = sv_fma_f64_x (pg, y, x8, P_7_0);
52 
53   /* Finalize. y = shift + z + z^3 * P(z^2).  */
54   sv_f64_t z3 = svmul_f64_x (pg, z2, az);
55   y = sv_fma_f64_x (pg, y, z3, az);
56 
57   /* Apply shift as indicated by `red` predicate.  */
58   y = svadd_f64_m (red, y, shift);
59 
60   return y;
61 }
62