/external/llvm-project/llvm/test/Transforms/InstCombine/NVPTX/ |
D | nvvm-intrins.ll | 4 ; We run this test twice; once with ftz on, and again with ftz off. Behold the 7 ; RUN: cat %s > %t.ftz 8 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz 9 ; RUN: opt < %t.ftz -instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHEC… 15 ; We handle nvvm intrinsics with ftz variants as follows: 16 ; - If the module is in ftz mode, the ftz variant is transformed into the 17 ; regular llvm intrinsic, and the non-ftz variant is left alone. 18 ; - If the module is not in ftz mode, it's the reverse: Only the non-ftz 19 ; variant is transformed, and the ftz variant is left alone. 38 ; NOFTZ: call float @llvm.nvvm.ceil.ftz.f [all …]
|
/external/llvm-project/llvm/test/CodeGen/NVPTX/ |
D | fast-math.ll | 34 ; CHECK: sqrt.rn.ftz.f32 35 ; CHECK: div.rn.ftz.f32 43 ; CHECK: sqrt.rn.ftz.f32 44 ; CHECK: div.approx.ftz.f32 52 ; CHECK: sqrt.approx.ftz.f32 53 ; CHECK: div.approx.ftz.f32 60 ; There are no fast-math or ftz versions of sqrt and div for f64. We use 74 ; CHECK: rcp.approx.ftz.f64 107 ; CHECK: rsqrt.approx.ftz.f32 124 ; CHECK: add.rn.ftz.f32 [all …]
|
D | sqrt-approx.ll | 21 ; CHECK: rsqrt.approx.ftz.f32 37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 62 ; CHECK: sqrt.rn.ftz.f32 69 ; CHECK: sqrt.approx.ftz.f32 84 ; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal, 85 ; so we just use the ftz version. 87 ; CHECK: rcp.approx.ftz.f64 101 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 103 ; CHECK: rcp.approx.ftz.f64 157 ; -- refined sqrt and rsqrt with ftz enabled -- [all …]
|
D | refl1.ll | 15 declare float @llvm.nvvm.sin.approx.ftz.f(float) #1 18 declare float @llvm.nvvm.cos.approx.ftz.f(float) #1 21 declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1 27 %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a) 28 %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a) 29 %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
|
D | f16-instructions.ll | 34 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 50 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 68 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; 84 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; 99 ; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 116 ; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; 132 ; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; 150 ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]]; 151 ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; 152 ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; [all …]
|
D | nvvm-reflect.ll | 5 ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz 9 ; RUN: cat %s > %t.ftz 10 ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz 11 ; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
|
D | math-intrins.ll | 38 ; CHECK: cvt.rpi.ftz.f32.f32 61 ; CHECK: cvt.rmi.ftz.f32.f32 113 ; CHECK: cvt.rni.ftz.f32.f32 136 ; CHECK: cvt.rni.ftz.f32.f32 159 ; CHECK: cvt.rzi.ftz.f32.f32 182 ; CHECK: abs.ftz.f32 219 ; CHECK: min.ftz.f32 256 ; CHECK: max.ftz.f32 279 ; CHECK: fma.rn.ftz.f32
|
D | inline-asm.ll | 6 ; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}} 7 %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
|
/external/llvm/test/CodeGen/NVPTX/ |
D | refl1.ll | 15 declare float @llvm.nvvm.sin.approx.ftz.f(float) #1 18 declare float @llvm.nvvm.cos.approx.ftz.f(float) #1 21 declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1 27 %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a) 28 %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a) 29 %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
|
D | inline-asm.ll | 6 ; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}} 7 %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/ |
D | inst-select-amdgcn.fmad.ftz.mir | 27 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 51 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 75 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 100 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 124 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1 147 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0 170 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0 191 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0 210 # %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2 235 %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/ |
D | llvm.amdgcn.fmad.ftz.ll | 6 declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c) 18 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val) 31 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val) 45 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val) 62 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0) 78 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val) 94 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val) 111 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
|
D | llvm.amdgcn.fmad.ftz.f16.ll | 5 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c) 17 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val) 30 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val) 43 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val) 56 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0) 73 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val) 90 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val) 108 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val)
|
/external/OpenCL-CTS/test_conformance/math_brute_force/ |
D | macro_unary_double.cpp | 144 int ftz; // non-zero if running in flush to zero mode member 176 test_info.ftz = f->ftz || gForceFTZ; in TestMacro_Int_Double() 307 int ftz = job->ftz; in Test() local 433 if (ftz) in Test() 457 if (ftz) in Test()
|
D | macro_unary_float.cpp | 143 int ftz; // non-zero if running in flush to zero mode member 175 test_info.ftz = in TestMacro_Int_Float() 176 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestMacro_Int_Float() 307 int ftz = job->ftz; in Test() local 438 if (ftz) in Test() 464 if (ftz) in Test()
|
D | unary_double.cpp | 147 int ftz; // non-zero if running in flush to zero mode member 186 test_info.ftz = f->ftz || gForceFTZ; in TestFunc_Double_Double() 332 int ftz = job->ftz; in Test() local 466 if (ftz) in Test()
|
D | macro_binary_float.cpp | 150 int ftz; // non-zero if running in flush to zero mode member 288 test_info.ftz = in TestMacro_Int_Float_Float() 289 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestMacro_Int_Float_Float() 433 int ftz = job->ftz; in Test() local 607 if (ftz) in Test() 653 if (ftz) in Test()
|
D | macro_binary_double.cpp | 152 int ftz; // non-zero if running in flush to zero mode member 298 test_info.ftz = f->ftz || gForceFTZ; in TestMacro_Int_Double_Double() 442 int ftz = job->ftz; in Test() local 616 if (ftz) in Test() 663 if (ftz) in Test()
|
D | unary_u_double.cpp | 133 int ftz = f->ftz || gForceFTZ; in TestFunc_Double_ULong() local 248 if (ftz) in TestFunc_Double_ULong()
|
D | i_unary_float.cpp | 123 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Int_Float() local 244 if (ftz && IsFloatSubnormal(s[j])) in TestFunc_Int_Float()
|
D | unary_u_float.cpp | 125 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Float_UInt() local 252 if (ftz) in TestFunc_Float_UInt()
|
D | i_unary_double.cpp | 125 int ftz = f->ftz || gForceFTZ; in TestFunc_Int_Double() local 247 if (ftz && IsDoubleSubnormal(s[j])) in TestFunc_Int_Double()
|
D | unary_two_results_float.cpp | 132 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Float2_Float() local 257 if (ftz) ForceFTZ(&oldMode); in TestFunc_Float2_Float() 300 if (isFract && ftz) RestoreFPState(&oldMode); in TestFunc_Float2_Float() 384 if (ftz) in TestFunc_Float2_Float()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/ |
D | gpu_codegen_test.cc | 34 GpuCodegenTest::CreateNewVerifiedModuleWithFTZ(bool ftz) { in CreateNewVerifiedModuleWithFTZ() argument 37 debug_options.set_xla_gpu_ftz(ftz); in CreateNewVerifiedModuleWithFTZ()
|
D | gpu_ftz_test.cc | 30 explicit GpuFtzTest(bool ftz) : ftz_(ftz) {} in GpuFtzTest() argument
|