/external/arm-optimized-routines/math/ |
D | v_expf.c | 41 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 43 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 57 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 59 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 60 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 65 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 66 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 73 p = v_fma_f32 (C0, r, C1); in V_NAME() 74 q = v_fma_f32 (C2, r, C3); in V_NAME() 75 q = v_fma_f32 (p, r2, q); in V_NAME() [all …]
|
D | v_expf_1u.c | 55 z = v_fma_f32 (x, InvLn2, Shift); in V_NAME() 57 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 58 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 63 r = v_fma_f32 (n, -Ln2hi, x); in V_NAME() 64 r = v_fma_f32 (n, -Ln2lo, r); in V_NAME() 70 poly = v_fma_f32 (C0, r, C1); in V_NAME() 71 poly = v_fma_f32 (poly, r, C2); in V_NAME() 72 poly = v_fma_f32 (poly, r, C3); in V_NAME() 73 poly = v_fma_f32 (poly, r, C4); in V_NAME() 74 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME() [all …]
|
D | v_cosf.c | 51 n = v_fma_f32 (InvPi, r + HalfPi, Shift); in V_NAME() 57 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 58 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 59 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 63 y = v_fma_f32 (A9, r2, A7); in V_NAME() 64 y = v_fma_f32 (y, r2, A5); in V_NAME() 65 y = v_fma_f32 (y, r2, A3); in V_NAME() 66 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_logf.c | 59 p = v_fma_f32 (P6, r, P5); in V_NAME() 60 q = v_fma_f32 (P4, r, P3); in V_NAME() 61 y = v_fma_f32 (P2, r, P1); in V_NAME() 62 p = v_fma_f32 (P7, r2, p); in V_NAME() 63 q = v_fma_f32 (p, r2, q); in V_NAME() 64 y = v_fma_f32 (q, r2, y); in V_NAME() 65 p = v_fma_f32 (Ln2, n, r); in V_NAME() 66 y = v_fma_f32 (y, r2, p); in V_NAME()
|
D | v_sinf.c | 51 n = v_fma_f32 (InvPi, r, Shift); in V_NAME() 56 r = v_fma_f32 (-Pi1, n, r); in V_NAME() 57 r = v_fma_f32 (-Pi2, n, r); in V_NAME() 58 r = v_fma_f32 (-Pi3, n, r); in V_NAME() 62 y = v_fma_f32 (A9, r2, A7); in V_NAME() 63 y = v_fma_f32 (y, r2, A5); in V_NAME() 64 y = v_fma_f32 (y, r2, A3); in V_NAME() 65 y = v_fma_f32 (y * r2, r, r); in V_NAME()
|
D | v_exp2f.c | 38 v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); in specialcase() 40 v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); in specialcase() 68 p = v_fma_f32 (C0, r, C1); in V_NAME() 69 q = v_fma_f32 (C2, r, C3); in V_NAME() 70 q = v_fma_f32 (p, r2, q); in V_NAME() 72 poly = v_fma_f32 (q, r2, p); in V_NAME() 75 return v_fma_f32 (poly, scale, scale); in V_NAME()
|
D | v_exp2f_1u.c | 65 poly = v_fma_f32 (C0, r, C1); in V_NAME() 66 poly = v_fma_f32 (poly, r, C2); in V_NAME() 67 poly = v_fma_f32 (poly, r, C3); in V_NAME() 68 poly = v_fma_f32 (poly, r, C4); in V_NAME() 69 poly = v_fma_f32 (poly, r, C5); in V_NAME() 70 poly = v_fma_f32 (poly, r, v_f32 (1.0f)); in V_NAME()
|
D | v_math.h | 180 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z) in v_fma_f32() function 449 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z) in v_fma_f32() function
|
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/AMDGPU/ |
D | use-sgpr-multiple-times.ll | 22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 34 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], s[[SGPR0]], [[VGPR1]] 56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SA]], [[VA0]], [[VB]] 57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SA]], [[VA1]], [[VB]] 74 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[SGPR0]], [[VGPR1]], s[[SGPR0]] 86 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[SGPR0]], s[[SGPR0]] 96 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 106 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]] 128 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]] 139 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]] [all …]
|
D | fma.ll | 19 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 35 ; GCN: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 42 ; SI: v_fma_f32 43 ; SI: v_fma_f32 45 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 62 ; SI: v_fma_f32 63 ; SI: v_fma_f32 64 ; SI: v_fma_f32 65 ; SI: v_fma_f32 66 ; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} [all …]
|
D | llvm.fma.f16.ll | 14 ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] 39 ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] 62 ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] 85 ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] 119 ; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]], v[[C_F32_0]] 121 ; SI-DAG: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]], v[[C_F32_1]] 167 ; SI: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32]], v[[C_F32_1]] 168 ; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], v[[A_F32]], v[[C_F32_0]] 208 ; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32]], v[[C_F32_0]] 210 ; SI-DAG: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32]], v[[C_F32_1]] [all …]
|
D | fdiv.ll | 21 ; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 22 ; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 24 ; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 25 ; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 26 ; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] 46 ; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 47 ; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] 49 ; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] 50 ; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] 51 ; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] [all …]
|
D | mad-combine.ll | 24 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 61 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] 62 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] 103 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 134 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] 167 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] 168 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] 206 ; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] 239 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] 240 ; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] [all …]
|
D | fmuladd.f32.ll | 35 ; GCN-DENORM-FASTFMA: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+}} 52 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 79 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 110 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 140 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 176 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 209 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]] 241 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] 274 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]] 301 ; GCN-FLUSH-FMAC: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]] [all …]
|
D | fadd-fma-fmul-combine.ll | 23 ; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], [[Z]] 24 ; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[FMA0]] 55 ; GCN-FASTFMA: v_fma_f32 [[FMA0:v[0-9]+]], [[U]], [[V]], -[[Z]] 56 ; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[FMA0]] 83 ; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]] 116 ; GCN-FASTFMA: v_fma_f32 [[FMA1:v[0-9]+]], [[X]], [[Y]], [[MUL]] 199 ; GCN-FASTFMA: v_fma_f32 [[MAD:v[0-9]+]], [[X]], [[Y]], [[MUL]] 236 ; GCN-FASTFMA-NEXT: v_fma_f32 [[FMA:v[0-9]+]], [[X]], [[Y]], [[U]]
|
D | fma-combine.ll | 394 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] 410 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] 426 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] 442 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] 458 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] 474 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]] 490 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] 506 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] 522 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] 538 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]] [all …]
|
D | fpext-free.ll | 81 ; GFX9-F32DENORM-NEXT: v_fma_f32 104 ; GFX9-F32DENORM-NEXT: v_fma_f32 125 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 143 ; GFX89: v_fma_f32 158 ; GFX89: v_fma_f32 307 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3 348 ; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3
|
D | fdiv.f16.ll | 14 ; SI: v_fma_f32 15 ; SI: v_fma_f32 17 ; SI: v_fma_f32 18 ; SI: v_fma_f32 19 ; SI: v_fma_f32
|
D | fneg-combines.ll | 691 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 694 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 716 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 742 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 746 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 774 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]] 777 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 801 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 804 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 828 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]] [all …]
|
D | frem.ll | 11 ; GCN: v_fma_f32
|
/external/llvm/test/CodeGen/AMDGPU/ |
D | use-sgpr-multiple-times.ll | 22 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]] 36 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]] 56 ; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]] 57 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]] 76 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] 90 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] 100 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0 110 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]] 132 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]] 143 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]] [all …]
|
D | fma.ll | 11 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} 26 ; SI: v_fma_f32 27 ; SI: v_fma_f32 43 ; SI: v_fma_f32 44 ; SI: v_fma_f32 45 ; SI: v_fma_f32 46 ; SI: v_fma_f32 64 ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
|
D | mad-combine.ll | 24 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 61 ; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]] 62 ; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]] 103 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 134 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]] 167 ; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]] 168 ; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]] 206 ; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]] 238 ; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]] 239 ; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]] [all …]
|
D | fdiv.ll | 26 ; I754-DAG: v_fma_f32 28 ; I754-DAG: v_fma_f32
|
D | frem.ll | 11 ; GCN: v_fma_f32
|