1 /*
2 * Single-precision SVE powi(x, n) function.
3 *
4 * Copyright (c) 2020-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #include "sv_math.h"
9 #if SV_SUPPORTED
10
11 /* Optimized single-precision vector powi (float base, integer power).
12 powi is developed for environments in which accuracy is of much less
13 importance than performance, hence we provide no estimate for worst-case
14 error. */
15 svfloat32_t
__sv_powif_x(svfloat32_t as,svint32_t ns,svbool_t p)16 __sv_powif_x (svfloat32_t as, svint32_t ns, svbool_t p)
17 {
18 /* Compute powi by successive squaring, right to left. */
19 svfloat32_t acc = svdup_n_f32 (1.f);
20 svbool_t want_recip = svcmplt_n_s32 (p, ns, 0);
21 svuint32_t ns_abs = svreinterpret_u32_s32 (svabs_s32_x (p, ns));
22
23 /* We use a max to avoid needing to check whether any lane != 0 on each
24 iteration. */
25 uint32_t max_n = svmaxv_u32 (p, ns_abs);
26
27 svfloat32_t c = as;
28 /* Successively square c, and use merging predication (_m) to determine
29 whether or not to perform the multiplication or keep the previous
30 iteration. */
31 while (true)
32 {
33 svbool_t px = svcmpeq_n_u32 (p, svand_n_u32_x (p, ns_abs, 1), 1);
34 acc = svmul_f32_m (px, acc, c);
35 max_n >>= 1;
36 if (max_n == 0)
37 break;
38
39 ns_abs = svlsr_n_u32_x (p, ns_abs, 1);
40 c = svmul_f32_x (p, c, c);
41 }
42
43 /* Negative powers are handled by computing the abs(n) version and then
44 taking the reciprocal. */
45 if (svptest_any (want_recip, want_recip))
46 acc = svdivr_n_f32_m (want_recip, acc, 1.0f);
47
48 return acc;
49 }
50
51 /* Note no trailing f for ZGV... name - 64-bit integer version is powk. */
52 strong_alias (__sv_powif_x, _ZGVsMxvv_powi)
53
54 #endif // SV_SUPPORTED
55