Home
last modified time | relevance | path

Searched refs:v_fma_f32 (Results 1 – 25 of 66) sorted by relevance

123

/external/arm-optimized-routines/math/
Dv_expf.c41 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase()
43 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase()
57 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME()
59 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
60 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
65 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
66 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
73 p = v_fma_f32 (C0, r, C1); in V_NAME()
74 q = v_fma_f32 (C2, r, C3); in V_NAME()
75 q = v_fma_f32 (p, r2, q); in V_NAME()
[all …]
Dv_expf_1u.c55 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME()
57 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
58 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
63 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
64 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
70 poly = v_fma_f32 (C0, r, C1); in V_NAME()
71 poly = v_fma_f32 (poly, r, C2); in V_NAME()
72 poly = v_fma_f32 (poly, r, C3); in V_NAME()
73 poly = v_fma_f32 (poly, r, C4); in V_NAME()
74 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
[all …]
Dv_cosf.c51 n = v_fma_f32 (InvPi, r + HalfPi, Shift); in V_NAME()
57 r = v_fma_f32 (-Pi1, n, r); in V_NAME()
58 r = v_fma_f32 (-Pi2, n, r); in V_NAME()
59 r = v_fma_f32 (-Pi3, n, r); in V_NAME()
63 y = v_fma_f32 (A9, r2, A7); in V_NAME()
64 y = v_fma_f32 (y, r2, A5); in V_NAME()
65 y = v_fma_f32 (y, r2, A3); in V_NAME()
66 y = v_fma_f32 (y * r2, r, r); in V_NAME()
Dv_logf.c59 p = v_fma_f32 (P6, r, P5); in V_NAME()
60 q = v_fma_f32 (P4, r, P3); in V_NAME()
61 y = v_fma_f32 (P2, r, P1); in V_NAME()
62 p = v_fma_f32 (P7, r2, p); in V_NAME()
63 q = v_fma_f32 (p, r2, q); in V_NAME()
64 y = v_fma_f32 (q, r2, y); in V_NAME()
65 p = v_fma_f32 (Ln2, n, r); in V_NAME()
66 y = v_fma_f32 (y, r2, p); in V_NAME()
Dv_sinf.c51 n = v_fma_f32 (InvPi, r, Shift); in V_NAME()
56 r = v_fma_f32 (-Pi1, n, r); in V_NAME()
57 r = v_fma_f32 (-Pi2, n, r); in V_NAME()
58 r = v_fma_f32 (-Pi3, n, r); in V_NAME()
62 y = v_fma_f32 (A9, r2, A7); in V_NAME()
63 y = v_fma_f32 (y, r2, A5); in V_NAME()
64 y = v_fma_f32 (y, r2, A3); in V_NAME()
65 y = v_fma_f32 (y * r2, r, r); in V_NAME()
Dv_exp2f.c38 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase()
40 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase()
68 p = v_fma_f32 (C0, r, C1); in V_NAME()
69 q = v_fma_f32 (C2, r, C3); in V_NAME()
70 q = v_fma_f32 (p, r2, q); in V_NAME()
72 poly = v_fma_f32 (q, r2, p); in V_NAME()
75 return v_fma_f32 (poly, scale, scale); in V_NAME()
Dv_exp2f_1u.c65 poly = v_fma_f32 (C0, r, C1); in V_NAME()
66 poly = v_fma_f32 (poly, r, C2); in V_NAME()
67 poly = v_fma_f32 (poly, r, C3); in V_NAME()
68 poly = v_fma_f32 (poly, r, C4); in V_NAME()
69 poly = v_fma_f32 (poly, r, C5); in V_NAME()
70 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
/external/llvm-project/libc/AOR_v20.02/math/
Dv_expf.c42 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase()
44 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase()
58 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME()
60 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
61 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
66 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
67 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
74 p = v_fma_f32 (C0, r, C1); in V_NAME()
75 q = v_fma_f32 (C2, r, C3); in V_NAME()
76 q = v_fma_f32 (p, r2, q); in V_NAME()
[all …]
Dv_expf_1u.c56 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME()
58 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
59 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
64 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME()
65 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME()
71 poly = v_fma_f32 (C0, r, C1); in V_NAME()
72 poly = v_fma_f32 (poly, r, C2); in V_NAME()
73 poly = v_fma_f32 (poly, r, C3); in V_NAME()
74 poly = v_fma_f32 (poly, r, C4); in V_NAME()
75 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
[all …]
Dv_cosf.c52 n = v_fma_f32 (InvPi, r + HalfPi, Shift); in V_NAME()
58 r = v_fma_f32 (-Pi1, n, r); in V_NAME()
59 r = v_fma_f32 (-Pi2, n, r); in V_NAME()
60 r = v_fma_f32 (-Pi3, n, r); in V_NAME()
64 y = v_fma_f32 (A9, r2, A7); in V_NAME()
65 y = v_fma_f32 (y, r2, A5); in V_NAME()
66 y = v_fma_f32 (y, r2, A3); in V_NAME()
67 y = v_fma_f32 (y * r2, r, r); in V_NAME()
Dv_sinf.c52 n = v_fma_f32 (InvPi, r, Shift); in V_NAME()
57 r = v_fma_f32 (-Pi1, n, r); in V_NAME()
58 r = v_fma_f32 (-Pi2, n, r); in V_NAME()
59 r = v_fma_f32 (-Pi3, n, r); in V_NAME()
63 y = v_fma_f32 (A9, r2, A7); in V_NAME()
64 y = v_fma_f32 (y, r2, A5); in V_NAME()
65 y = v_fma_f32 (y, r2, A3); in V_NAME()
66 y = v_fma_f32 (y * r2, r, r); in V_NAME()
Dv_logf.c60 p = v_fma_f32 (P6, r, P5); in V_NAME()
61 q = v_fma_f32 (P4, r, P3); in V_NAME()
62 y = v_fma_f32 (P2, r, P1); in V_NAME()
63 p = v_fma_f32 (P7, r2, p); in V_NAME()
64 q = v_fma_f32 (p, r2, q); in V_NAME()
65 y = v_fma_f32 (q, r2, y); in V_NAME()
66 p = v_fma_f32 (Ln2, n, r); in V_NAME()
67 y = v_fma_f32 (y, r2, p); in V_NAME()
Dv_exp2f.c39 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase()
41 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase()
69 p = v_fma_f32 (C0, r, C1); in V_NAME()
70 q = v_fma_f32 (C2, r, C3); in V_NAME()
71 q = v_fma_f32 (p, r2, q); in V_NAME()
73 poly = v_fma_f32 (q, r2, p); in V_NAME()
76 return v_fma_f32 (poly, scale, scale); in V_NAME()
Dv_exp2f_1u.c66 poly = v_fma_f32 (C0, r, C1); in V_NAME()
67 poly = v_fma_f32 (poly, r, C2); in V_NAME()
68 poly = v_fma_f32 (poly, r, C3); in V_NAME()
69 poly = v_fma_f32 (poly, r, C4); in V_NAME()
70 poly = v_fma_f32 (poly, r, C5); in V_NAME()
71 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
/external/llvm-project/llvm/test/CodeGen/AMDGPU/
Dllvm.powi.ll60 ; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0
61 ; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2
64 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3
65 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4
66 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3
77 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0
78 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3
80 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2
81 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4
82 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2
[all …]
Dfrem.ll32 ; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0
33 ; SI-NEXT: v_fma_f32 v4, v5, v4, v4
35 ; SI-NEXT: v_fma_f32 v6, -v3, v5, v2
36 ; SI-NEXT: v_fma_f32 v5, v6, v4, v5
37 ; SI-NEXT: v_fma_f32 v2, -v3, v5, v2
42 ; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
73 ; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0
74 ; CI-NEXT: v_fma_f32 v4, v5, v4, v4
76 ; CI-NEXT: v_fma_f32 v6, -v3, v5, v2
77 ; CI-NEXT: v_fma_f32 v5, v6, v4, v5
[all …]
Dstrict_fma.f32.ll8 ; GCN-NEXT: v_fma_f32 v0, v0, v1, v2
18 ; GCN-NEXT: v_fma_f32 v0, v0, v2, v4
19 ; GCN-NEXT: v_fma_f32 v1, v1, v3, v5
29 ; GCN-NEXT: v_fma_f32 v0, v0, v3, v6
30 ; GCN-NEXT: v_fma_f32 v1, v1, v4, v7
31 ; GCN-NEXT: v_fma_f32 v2, v2, v5, v8
41 ; GCN-NEXT: v_fma_f32 v0, v0, v4, v8
42 ; GCN-NEXT: v_fma_f32 v1, v1, v5, v9
43 ; GCN-NEXT: v_fma_f32 v2, v2, v6, v10
44 ; GCN-NEXT: v_fma_f32 v3, v3, v7, v11
[all …]
Dfdiv.ll23 ; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
24 ; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
26 ; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
27 ; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
28 ; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
49 ; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
50 ; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
52 ; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
53 ; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
54 ; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
[all …]
Dfma.ll19 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
35 ; GCN: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
42 ; SI: v_fma_f32
43 ; SI: v_fma_f32
45 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
62 ; SI: v_fma_f32
63 ; SI: v_fma_f32
64 ; SI: v_fma_f32
65 ; SI: v_fma_f32
66 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
[all …]
Duse-sgpr-multiple-times.ll22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
34 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], s[[SGPR0]], [[VGPR1]]
56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SA]], [[VA0]], [[VB]]
57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SA]], [[VA1]], [[VB]]
74 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], [[VGPR1]], s[[SGPR0]]
86 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[SGPR0]], s[[SGPR0]]
96 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
106 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
128 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
140 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[SK]], [[SK]], [[VGPR]]
[all …]
Dfmad-formation-fmul-distribute-denormal-mode.ll20 ; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
44 ; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
68 ; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
69 ; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
96 ; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
97 ; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
124 ; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
149 ; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
/external/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/
Dfrem.ll25 ; CI-NEXT: v_fma_f32 v5, -v2, v4, 1.0
26 ; CI-NEXT: v_fma_f32 v4, v5, v4, v4
28 ; CI-NEXT: v_fma_f32 v6, -v2, v5, v3
29 ; CI-NEXT: v_fma_f32 v5, v6, v4, v5
30 ; CI-NEXT: v_fma_f32 v2, -v2, v5, v3
36 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0
97 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0
152 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0
201 ; CI-NEXT: v_fma_f32 v4, -v1, v3, 1.0
202 ; CI-NEXT: v_fma_f32 v3, v4, v3, v3
[all …]
Dfma.ll6 define float @v_fma_f32(float %x, float %y, float %z) {
7 ; GFX6-LABEL: v_fma_f32:
10 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2
13 ; GFX8-LABEL: v_fma_f32:
16 ; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2
19 ; GFX9-LABEL: v_fma_f32:
22 ; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2
32 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4
33 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5
39 ; GFX8-NEXT: v_fma_f32 v0, v0, v2, v4
[all …]
/external/llvm/test/CodeGen/AMDGPU/
Duse-sgpr-multiple-times.ll22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
36 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]]
57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]]
76 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
90 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
100 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
110 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
132 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
143 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
[all …]
Dfma.ll11 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
26 ; SI: v_fma_f32
27 ; SI: v_fma_f32
43 ; SI: v_fma_f32
44 ; SI: v_fma_f32
45 ; SI: v_fma_f32
46 ; SI: v_fma_f32
64 ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}

123