Home
last modified time | relevance | path

Searched refs:ftz (Results 1 – 25 of 83) sorted by relevance

1234

/external/llvm-project/llvm/test/Transforms/InstCombine/NVPTX/
Dnvvm-intrins.ll4 ; We run this test twice; once with ftz on, and again with ftz off. Behold the
7 ; RUN: cat %s > %t.ftz
8 ; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz
9 ; RUN: opt < %t.ftz -instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHEC…
15 ; We handle nvvm intrinsics with ftz variants as follows:
16 ; - If the module is in ftz mode, the ftz variant is transformed into the
17 ; regular llvm intrinsic, and the non-ftz variant is left alone.
18 ; - If the module is not in ftz mode, it's the reverse: Only the non-ftz
19 ; variant is transformed, and the ftz variant is left alone.
38 ; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
[all …]
/external/llvm-project/llvm/test/CodeGen/NVPTX/
Dfast-math.ll34 ; CHECK: sqrt.rn.ftz.f32
35 ; CHECK: div.rn.ftz.f32
43 ; CHECK: sqrt.rn.ftz.f32
44 ; CHECK: div.approx.ftz.f32
52 ; CHECK: sqrt.approx.ftz.f32
53 ; CHECK: div.approx.ftz.f32
60 ; There are no fast-math or ftz versions of sqrt and div for f64. We use
74 ; CHECK: rcp.approx.ftz.f64
107 ; CHECK: rsqrt.approx.ftz.f32
124 ; CHECK: add.rn.ftz.f32
[all …]
Dsqrt-approx.ll21 ; CHECK: rsqrt.approx.ftz.f32
37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
62 ; CHECK: sqrt.rn.ftz.f32
69 ; CHECK: sqrt.approx.ftz.f32
84 ; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal,
85 ; so we just use the ftz version.
87 ; CHECK: rcp.approx.ftz.f64
101 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
103 ; CHECK: rcp.approx.ftz.f64
157 ; -- refined sqrt and rsqrt with ftz enabled --
[all …]
Drefl1.ll15 declare float @llvm.nvvm.sin.approx.ftz.f(float) #1
18 declare float @llvm.nvvm.cos.approx.ftz.f(float) #1
21 declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1
27 %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a)
28 %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a)
29 %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
Df16-instructions.ll34 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
50 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
68 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]];
84 ; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]];
99 ; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
116 ; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]];
132 ; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]];
150 ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]];
151 ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]];
152 ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]];
[all …]
Dnvvm-reflect.ll5 ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
9 ; RUN: cat %s > %t.ftz
10 ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11 ; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
Dmath-intrins.ll38 ; CHECK: cvt.rpi.ftz.f32.f32
61 ; CHECK: cvt.rmi.ftz.f32.f32
113 ; CHECK: cvt.rni.ftz.f32.f32
136 ; CHECK: cvt.rni.ftz.f32.f32
159 ; CHECK: cvt.rzi.ftz.f32.f32
182 ; CHECK: abs.ftz.f32
219 ; CHECK: min.ftz.f32
256 ; CHECK: max.ftz.f32
279 ; CHECK: fma.rn.ftz.f32
Dinline-asm.ll6 ; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}}
7 %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
/external/llvm/test/CodeGen/NVPTX/
Drefl1.ll15 declare float @llvm.nvvm.sin.approx.ftz.f(float) #1
18 declare float @llvm.nvvm.cos.approx.ftz.f(float) #1
21 declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1
27 %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a)
28 %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a)
29 %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
Dinline-asm.ll6 ; CHECK: ex2.approx.ftz.f32 %f{{[0-9]+}}, %f{{[0-9]+}}
7 %0 = call float asm "ex2.approx.ftz.f32 $0, $1;", "=f,f"(float %x)
/external/llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/
Dinst-select-amdgcn.fmad.ftz.mir27 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
51 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
75 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
100 %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
124 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %1
147 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %0
170 %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %1, %0, %0
191 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0
210 # %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %2
235 %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %1, %3
/external/llvm-project/llvm/test/CodeGen/AMDGPU/
Dllvm.amdgcn.fmad.ftz.ll6 declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
18 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val)
31 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val)
45 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val)
62 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0)
78 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val)
94 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val)
111 %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
Dllvm.amdgcn.fmad.ftz.f16.ll5 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
17 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val)
30 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val)
43 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val)
56 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0)
73 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val)
90 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val)
108 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val)
/external/OpenCL-CTS/test_conformance/math_brute_force/
Dmacro_unary_double.cpp144 int ftz; // non-zero if running in flush to zero mode member
176 test_info.ftz = f->ftz || gForceFTZ; in TestMacro_Int_Double()
307 int ftz = job->ftz; in Test() local
433 if (ftz) in Test()
457 if (ftz) in Test()
Dmacro_unary_float.cpp143 int ftz; // non-zero if running in flush to zero mode member
175 test_info.ftz = in TestMacro_Int_Float()
176 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestMacro_Int_Float()
307 int ftz = job->ftz; in Test() local
438 if (ftz) in Test()
464 if (ftz) in Test()
Dunary_double.cpp147 int ftz; // non-zero if running in flush to zero mode member
186 test_info.ftz = f->ftz || gForceFTZ; in TestFunc_Double_Double()
332 int ftz = job->ftz; in Test() local
466 if (ftz) in Test()
Dmacro_binary_float.cpp150 int ftz; // non-zero if running in flush to zero mode member
288 test_info.ftz = in TestMacro_Int_Float_Float()
289 f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestMacro_Int_Float_Float()
433 int ftz = job->ftz; in Test() local
607 if (ftz) in Test()
653 if (ftz) in Test()
Dmacro_binary_double.cpp152 int ftz; // non-zero if running in flush to zero mode member
298 test_info.ftz = f->ftz || gForceFTZ; in TestMacro_Int_Double_Double()
442 int ftz = job->ftz; in Test() local
616 if (ftz) in Test()
663 if (ftz) in Test()
Dunary_u_double.cpp133 int ftz = f->ftz || gForceFTZ; in TestFunc_Double_ULong() local
248 if (ftz) in TestFunc_Double_ULong()
Di_unary_float.cpp123 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Int_Float() local
244 if (ftz && IsFloatSubnormal(s[j])) in TestFunc_Int_Float()
Dunary_u_float.cpp125 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Float_UInt() local
252 if (ftz) in TestFunc_Float_UInt()
Di_unary_double.cpp125 int ftz = f->ftz || gForceFTZ; in TestFunc_Int_Double() local
247 if (ftz && IsDoubleSubnormal(s[j])) in TestFunc_Int_Double()
Dunary_two_results_float.cpp132 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); in TestFunc_Float2_Float() local
257 if (ftz) ForceFTZ(&oldMode); in TestFunc_Float2_Float()
300 if (isFract && ftz) RestoreFPState(&oldMode); in TestFunc_Float2_Float()
384 if (ftz) in TestFunc_Float2_Float()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/
Dgpu_codegen_test.cc34 GpuCodegenTest::CreateNewVerifiedModuleWithFTZ(bool ftz) { in CreateNewVerifiedModuleWithFTZ() argument
37 debug_options.set_xla_gpu_ftz(ftz); in CreateNewVerifiedModuleWithFTZ()
Dgpu_ftz_test.cc30 explicit GpuFtzTest(bool ftz) : ftz_(ftz) {} in GpuFtzTest() argument

1234