/external/arm-optimized-routines/math/ |
D | v_expf.c | 41 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 43 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 57 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 59 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 60 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 65 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 66 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 73 p = v_fma_f32 (C0, r, C1); in V_NAME() 74 q = v_fma_f32 (C2, r, C3); in V_NAME() 75 q = v_fma_f32 (p, r2, q); in V_NAME() [all …]
|
D | v_expf_1u.c | 55 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 57 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 58 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 63 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 64 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 70 poly = v_fma_f32 (C0, r, C1); in V_NAME() 71 poly = v_fma_f32 (poly, r, C2); in V_NAME() 72 poly = v_fma_f32 (poly, r, C3); in V_NAME() 73 poly = v_fma_f32 (poly, r, C4); in V_NAME() 74 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME() [all …]
|
D | v_cosf.c | 51 n = v_fma_f32 (InvPi, r + HalfPi, Shift); in V_NAME() 57 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 58 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 59 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 63 y = v_fma_f32 (A9, r2, A7); in V_NAME() 64 y = v_fma_f32 (y, r2, A5); in V_NAME() 65 y = v_fma_f32 (y, r2, A3); in V_NAME() 66 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_logf.c | 59 p = v_fma_f32 (P6, r, P5); in V_NAME() 60 q = v_fma_f32 (P4, r, P3); in V_NAME() 61 y = v_fma_f32 (P2, r, P1); in V_NAME() 62 p = v_fma_f32 (P7, r2, p); in V_NAME() 63 q = v_fma_f32 (p, r2, q); in V_NAME() 64 y = v_fma_f32 (q, r2, y); in V_NAME() 65 p = v_fma_f32 (Ln2, n, r); in V_NAME() 66 y = v_fma_f32 (y, r2, p); in V_NAME()
|
D | v_sinf.c | 51 n = v_fma_f32 (InvPi, r, Shift); in V_NAME() 56 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 57 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 58 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 62 y = v_fma_f32 (A9, r2, A7); in V_NAME() 63 y = v_fma_f32 (y, r2, A5); in V_NAME() 64 y = v_fma_f32 (y, r2, A3); in V_NAME() 65 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_exp2f.c | 38 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 40 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 68 p = v_fma_f32 (C0, r, C1); in V_NAME() 69 q = v_fma_f32 (C2, r, C3); in V_NAME() 70 q = v_fma_f32 (p, r2, q); in V_NAME() 72 poly = v_fma_f32 (q, r2, p); in V_NAME() 75 return v_fma_f32 (poly, scale, scale); in V_NAME()
|
D | v_exp2f_1u.c | 65 poly = v_fma_f32 (C0, r, C1); in V_NAME() 66 poly = v_fma_f32 (poly, r, C2); in V_NAME() 67 poly = v_fma_f32 (poly, r, C3); in V_NAME() 68 poly = v_fma_f32 (poly, r, C4); in V_NAME() 69 poly = v_fma_f32 (poly, r, C5); in V_NAME() 70 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
|
/external/llvm-project/libc/AOR_v20.02/math/ |
D | v_expf.c | 42 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 44 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 58 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 60 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 61 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 66 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 67 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 74 p = v_fma_f32 (C0, r, C1); in V_NAME() 75 q = v_fma_f32 (C2, r, C3); in V_NAME() 76 q = v_fma_f32 (p, r2, q); in V_NAME() [all …]
|
D | v_expf_1u.c | 56 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 58 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 59 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 64 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 65 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 71 poly = v_fma_f32 (C0, r, C1); in V_NAME() 72 poly = v_fma_f32 (poly, r, C2); in V_NAME() 73 poly = v_fma_f32 (poly, r, C3); in V_NAME() 74 poly = v_fma_f32 (poly, r, C4); in V_NAME() 75 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME() [all …]
|
D | v_cosf.c | 52 n = v_fma_f32 (InvPi, r + HalfPi, Shift); in V_NAME() 58 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 59 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 60 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 64 y = v_fma_f32 (A9, r2, A7); in V_NAME() 65 y = v_fma_f32 (y, r2, A5); in V_NAME() 66 y = v_fma_f32 (y, r2, A3); in V_NAME() 67 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_sinf.c | 52 n = v_fma_f32 (InvPi, r, Shift); in V_NAME() 57 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 58 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 59 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 63 y = v_fma_f32 (A9, r2, A7); in V_NAME() 64 y = v_fma_f32 (y, r2, A5); in V_NAME() 65 y = v_fma_f32 (y, r2, A3); in V_NAME() 66 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_logf.c | 60 p = v_fma_f32 (P6, r, P5); in V_NAME() 61 q = v_fma_f32 (P4, r, P3); in V_NAME() 62 y = v_fma_f32 (P2, r, P1); in V_NAME() 63 p = v_fma_f32 (P7, r2, p); in V_NAME() 64 q = v_fma_f32 (p, r2, q); in V_NAME() 65 y = v_fma_f32 (q, r2, y); in V_NAME() 66 p = v_fma_f32 (Ln2, n, r); in V_NAME() 67 y = v_fma_f32 (y, r2, p); in V_NAME()
|
D | v_exp2f.c | 39 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 41 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 69 p = v_fma_f32 (C0, r, C1); in V_NAME() 70 q = v_fma_f32 (C2, r, C3); in V_NAME() 71 q = v_fma_f32 (p, r2, q); in V_NAME() 73 poly = v_fma_f32 (q, r2, p); in V_NAME() 76 return v_fma_f32 (poly, scale, scale); in V_NAME()
|
D | v_exp2f_1u.c | 66 poly = v_fma_f32 (C0, r, C1); in V_NAME() 67 poly = v_fma_f32 (poly, r, C2); in V_NAME() 68 poly = v_fma_f32 (poly, r, C3); in V_NAME() 69 poly = v_fma_f32 (poly, r, C4); in V_NAME() 70 poly = v_fma_f32 (poly, r, C5); in V_NAME() 71 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/ |
D | llvm.powi.ll | 60 ; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 61 ; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 64 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 65 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 66 ; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 77 ; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 78 ; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 80 ; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 81 ; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 82 ; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 [all …]
|
D | frem.ll | 32 ; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 33 ; SI-NEXT: v_fma_f32 v4, v5, v4, v4 35 ; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 36 ; SI-NEXT: v_fma_f32 v5, v6, v4, v5 37 ; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 42 ; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 73 ; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 74 ; CI-NEXT: v_fma_f32 v4, v5, v4, v4 76 ; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 77 ; CI-NEXT: v_fma_f32 v5, v6, v4, v5 [all …]
|
D | strict_fma.f32.ll | 8 ; GCN-NEXT: v_fma_f32 v0, v0, v1, v2 18 ; GCN-NEXT: v_fma_f32 v0, v0, v2, v4 19 ; GCN-NEXT: v_fma_f32 v1, v1, v3, v5 29 ; GCN-NEXT: v_fma_f32 v0, v0, v3, v6 30 ; GCN-NEXT: v_fma_f32 v1, v1, v4, v7 31 ; GCN-NEXT: v_fma_f32 v2, v2, v5, v8 41 ; GCN-NEXT: v_fma_f32 v0, v0, v4, v8 42 ; GCN-NEXT: v_fma_f32 v1, v1, v5, v9 43 ; GCN-NEXT: v_fma_f32 v2, v2, v6, v10 44 ; GCN-NEXT: v_fma_f32 v3, v3, v7, v11 [all …]
|
D | fdiv.ll | 23 ; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 24 ; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 26 ; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 27 ; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 28 ; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 49 ; PREGFX10: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 50 ; PREGFX10: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 52 ; PREGFX10: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 53 ; PREGFX10: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 54 ; PREGFX10: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] [all …]
|
D | fma.ll | 19 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 35 ; GCN: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 42 ; SI: v_fma_f32 43 ; SI: v_fma_f32 45 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 62 ; SI: v_fma_f32 63 ; SI: v_fma_f32 64 ; SI: v_fma_f32 65 ; SI: v_fma_f32 66 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} [all …]
|
D | use-sgpr-multiple-times.ll | 22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 34 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], s[[SGPR0]], [[VGPR1]] 56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SA]], [[VA0]], [[VB]] 57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SA]], [[VA1]], [[VB]] 74 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], [[VGPR1]], s[[SGPR0]] 86 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[SGPR0]], s[[SGPR0]] 96 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 106 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]] 128 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]] 140 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[SK]], [[SK]], [[VGPR]] [all …]
|
D | fmad-formation-fmul-distribute-denormal-mode.ll | 20 ; FMA-NEXT: v_fma_f32 v0, v1, v0, v0 44 ; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0 68 ; FMA-NEXT: v_fma_f32 v0, v2, v0, v0 69 ; FMA-NEXT: v_fma_f32 v1, v3, v1, v1 96 ; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0 97 ; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1 124 ; FMA-NEXT: v_fma_f32 v0, v0, v1, v1 149 ; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/ |
D | frem.ll | 25 ; CI-NEXT: v_fma_f32 v5, -v2, v4, 1.0 26 ; CI-NEXT: v_fma_f32 v4, v5, v4, v4 28 ; CI-NEXT: v_fma_f32 v6, -v2, v5, v3 29 ; CI-NEXT: v_fma_f32 v5, v6, v4, v5 30 ; CI-NEXT: v_fma_f32 v2, -v2, v5, v3 36 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 97 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 152 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 201 ; CI-NEXT: v_fma_f32 v4, -v1, v3, 1.0 202 ; CI-NEXT: v_fma_f32 v3, v4, v3, v3 [all …]
|
D | fma.ll | 6 define float @v_fma_f32(float %x, float %y, float %z) { 7 ; GFX6-LABEL: v_fma_f32: 10 ; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 13 ; GFX8-LABEL: v_fma_f32: 16 ; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2 19 ; GFX9-LABEL: v_fma_f32: 22 ; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 32 ; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 33 ; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 39 ; GFX8-NEXT: v_fma_f32 v0, v0, v2, v4 [all …]
|
/external/llvm/test/CodeGen/AMDGPU/ |
D | use-sgpr-multiple-times.ll | 22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 36 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] 56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]] 57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]] 76 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] 90 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] 100 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 110 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]] 132 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]] 143 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]] [all …]
|
D | fma.ll | 11 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 26 ; SI: v_fma_f32 27 ; SI: v_fma_f32 43 ; SI: v_fma_f32 44 ; SI: v_fma_f32 45 ; SI: v_fma_f32 46 ; SI: v_fma_f32 64 ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
|