/external/eigen/test/ |
D | half_float.cpp | 16 struct half; 19 using Eigen::half; 26 VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); in test_conversion() 27 VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); in test_conversion() 28 VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); in test_conversion() 29 VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); in test_conversion() 30 VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); in test_conversion() 31 VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); in test_conversion() 32 VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. in test_conversion() 35 VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); in test_conversion() [all …]
|
/external/llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/ |
D | packed-math.ll | 8 define amdgpu_kernel void @test1_as_3_3_3_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half … 10 ; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)* 11 ; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2 12 ; GCN-NEXT: [[TMP3:%.*]] = bitcast half addrspace(3)* [[B:%.*]] to <2 x half> addrspace(3)* 13 ; GCN-NEXT: [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2 14 ; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]] 15 ; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)* 16 ; GCN-NEXT: store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2 19 %i0 = load half, half addrspace(3)* %a, align 2 20 %i1 = load half, half addrspace(3)* %b, align 2 [all …]
|
/external/llvm-project/llvm/test/CodeGen/AArch64/ |
D | fp16-vector-load-store.ll | 4 define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 { 8 %0 = load <4 x half>, <4 x half>* %a, align 8 9 ret <4 x half> %0 13 define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 { 17 %0 = load <8 x half>, <8 x half>* %a, align 16 18 ret <8 x half> %0 22 define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 { 26 %0 = load half, half* %a, align 2 27 %1 = insertelement <4 x half> undef, half %0, i32 0 28 %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer [all …]
|
D | fp16_intrinsic_lane.ll | 4 declare half @llvm.aarch64.neon.fmulx.f16(half, half) 5 declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) 6 declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) 7 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) 8 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 9 declare half @llvm.fma.f16(half, half, half) #1 11 define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane… 19 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer 20 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a) 21 ret <4 x half> %fmla3 [all …]
|
D | fp16_intrinsic_scalar_3op.ll | 3 define dso_local half @t_vfmah_f16(half %a, half %b, half %c) { 8 %0 = tail call half @llvm.fma.f16(half %b, half %c, half %a) 9 ret half %0 12 define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp { 16 %0 = tail call half @llvm.fma.f16(half %a, half %b, half %c) 17 %mul = fmul half %0, -1.000000e+00 18 ret half %mul 21 define half @fms16(half %a, half %b, half %c) nounwind readnone ssp { 25 %mul = fmul half %b, -1.000000e+00 26 %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c) [all …]
|
D | fp16_intrinsic_vector_2op.ll | 3 declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>) 4 declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>) 5 declare <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half>, <4 x half>) 6 declare <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half>, <8 x half>) 7 declare <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half>, <4 x half>) 8 declare <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half>, <8 x half>) 9 declare <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half>, <4 x half>) 10 declare <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half>, <8 x half>) 11 declare <4 x half> @llvm.fabs.v4f16(<4 x half>) 12 declare <8 x half> @llvm.fabs.v8f16(<8 x half>) [all …]
|
/external/llvm/test/CodeGen/AArch64/ |
D | fp16-vector-load-store.ll | 4 define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 { 8 %0 = load <4 x half>, <4 x half>* %a, align 8 9 ret <4 x half> %0 13 define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 { 17 %0 = load <8 x half>, <8 x half>* %a, align 16 18 ret <8 x half> %0 22 define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 { 26 %0 = load half, half* %a, align 2 27 %1 = insertelement <4 x half> undef, half %0, i32 0 28 %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer [all …]
|
D | fp16-vector-shuffle.ll | 4 define <4 x half> @select_64(<4 x half> %a, <4 x half> %b, <4 x i16> %c) #0 { 8 %0 = bitcast <4 x half> %a to <4 x i16> 9 %1 = bitcast <4 x half> %b to <4 x i16> 14 %3 = bitcast <4 x i16> %vbsl5.i to <4 x half> 15 ret <4 x half> %3 19 define <8 x half> @select_128(<8 x half> %a, <8 x half> %b, <8 x i16> %c) #0 { 23 %0 = bitcast <8 x half> %a to <8 x i16> 24 %1 = bitcast <8 x half> %b to <8 x i16> 29 %3 = bitcast <8 x i16> %vbsl5.i to <8 x half> 30 ret <8 x half> %3 [all …]
|
/external/eigen/Eigen/src/Core/arch/CUDA/ |
D | Half.h | 49 struct half; 77 struct half : public half_impl::half_base { struct 82 EIGEN_DEVICE_FUNC half() {} in half() function 84 EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} in half() function 85 EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {} in half() function 87 explicit EIGEN_DEVICE_FUNC half(bool b) in half() argument 90 explicit EIGEN_DEVICE_FUNC half(const T& val) in half() argument 92 explicit EIGEN_DEVICE_FUNC half(float f) in half() function 136 EIGEN_DEVICE_FUNC half& operator=(const half& other) { argument 151 __device__ half operator + (const half& a, const half& b) { [all …]
|
/external/tensorflow/tensorflow/compiler/xla/tests/ |
D | half_test.cc | 43 std::function<half(half)> compute_func; 51 std::vector<half> x({half(1.4), half(-2.3), half(3.2), half(-4.1), half(9.0), in XLA_TEST_P() 52 half(42.0), half(-9.0), half(-100.0)}); in XLA_TEST_P() 55 auto x_data = CreateR1Parameter<half>(x, /*parameter_number=*/0, "x", in XLA_TEST_P() 58 std::function<half(half)> compute_func = GetParam().compute_func; in XLA_TEST_P() 59 std::vector<half> expected; in XLA_TEST_P() 67 ComputeAndCompareR1<half>(&builder, expected, {x_data.get()}, error_spec_); in XLA_TEST_P() 70 half sign_imp(half value) { in sign_imp() 72 return half((x < .0) ? -1 : (x > .0)); in sign_imp() 75 half round_imp(half value) { in round_imp() [all …]
|
/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/ |
D | space_to_depth_test_util.cc | 32 src_tensor.data = {half(1.0f), half(2.0f), half(3.0f), half(4.0f)}; in SpaceToDepthTensorShape1x2x2x1BlockSize2Test() 47 PointWiseNear({half(1.0f), half(2.0f), half(3.0f), half(4.0f)}, in SpaceToDepthTensorShape1x2x2x1BlockSize2Test() 58 src_tensor.data = {half(1.4f), half(2.3f), half(3.2f), half(4.1f), in SpaceToDepthTensorShape1x2x2x2BlockSize2Test() 59 half(5.4f), half(6.3f), half(7.2f), half(8.1f)}; in SpaceToDepthTensorShape1x2x2x2BlockSize2Test() 74 PointWiseNear({half(1.4f), half(2.3f), half(3.2f), half(4.1f), in SpaceToDepthTensorShape1x2x2x2BlockSize2Test() 75 half(5.4f), half(6.3f), half(7.2f), half(8.1f)}, in SpaceToDepthTensorShape1x2x2x2BlockSize2Test() 86 src_tensor.data = {half(1.0f), half(2.0f), half(3.0f), half(4.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test() 87 half(5.0f), half(6.0f), half(7.0f), half(8.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test() 88 half(9.0f), half(10.0f), half(11.0f), half(12.0f)}; in SpaceToDepthTensorShape1x2x2x3BlockSize2Test() 103 PointWiseNear({half(1.0f), half(2.0f), half(3.0f), half(4.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test() [all …]
|
D | concat_test_util.cc | 32 src0.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)}; in ConcatWidthTest() 34 src1.data = {half(1.0f), half(-1.2f), half(-0.45f), half(1.045f), in ConcatWidthTest() 35 half(1.1f), half(-1.3f), half(-0.55f), half(2.045f)}; in ConcatWidthTest() 54 PointWiseNear({half(0.0f), half(-1.0f), half(1.0f), half(-1.2f), in ConcatWidthTest() 55 half(-0.45f), half(1.045f), half(-0.05f), half(0.045f), in ConcatWidthTest() 56 half(1.1f), half(-1.3f), half(-0.55f), half(2.045f)}, in ConcatWidthTest() 66 src0.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)}; in ConcatHeightTest() 68 src1.data = {half(1.0f), half(-1.2f)}; in ConcatHeightTest() 86 RETURN_IF_ERROR(PointWiseNear({half(0.0f), half(-1.0f), half(-0.05f), in ConcatHeightTest() 87 half(0.045f), half(1.0f), half(-1.2f)}, in ConcatHeightTest() [all …]
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/ |
D | llvm.amdgcn.fmad.ftz.f16.ll | 5 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c) 10 half addrspace(1)* %r, 11 half addrspace(1)* %a, 12 half addrspace(1)* %b, 13 half addrspace(1)* %c) { 14 %a.val = load half, half addrspace(1)* %a 15 %b.val = load half, half addrspace(1)* %b 16 %c.val = load half, half addrspace(1)* %c 17 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val) 18 store half %r.val, half addrspace(1)* %r [all …]
|
D | packed-op-sel.ll | 14 …kernel void @fma_vector_vector_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %… 16 %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 18 %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 19 %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 20 %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 22 %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0 23 …%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitia… 25 …%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %sca… 26 store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 42 …ma_vector_vector_neg_broadcast_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %… [all …]
|
D | fcanonicalize.f16.ll | 5 declare half @llvm.fabs.f16(half) #0 6 declare half @llvm.canonicalize.f16(half) #0 7 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 8 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 9 declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0 10 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0 16 define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(half addrspace(1)* %out) #1 { 17 %canonicalized = call half @llvm.canonicalize.f16(half undef) 18 store half %canonicalized, half addrspace(1)* %out 28 define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 { [all …]
|
D | v_mac_f16.ll | 18 half addrspace(1)* %r, 19 half addrspace(1)* %a, 20 half addrspace(1)* %b, 21 half addrspace(1)* %c) #0 { 23 %a.val = load half, half addrspace(1)* %a 24 %b.val = load half, half addrspace(1)* %b 25 %c.val = load half, half addrspace(1)* %c 27 %t.val = fmul half %a.val, %b.val 28 %r.val = fadd half %t.val, %c.val 30 store half %r.val, half addrspace(1)* %r [all …]
|
D | fneg-fabs.f16.ll | 12 define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) { 13 %fabs = call half @llvm.fabs.f16(half %x) 14 %fsub = fsub half -0.0, %fabs 15 %fadd = fadd half %y, %fsub 16 store half %fadd, half addrspace(1)* %out, align 2 30 define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { 31 %fabs = call half @llvm.fabs.f16(half %x) 32 %fsub = fsub half -0.0, %fabs 33 %fmul = fmul half %y, %fsub 34 store half %fmul, half addrspace(1)* %out, align 2 [all …]
|
D | pk_max_f16_literal.ll | 6 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) { 10 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 11 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 12 …%tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0… 13 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 19 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) { 23 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 24 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 25 …%tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0… 26 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 [all …]
|
/external/llvm-project/llvm/test/CodeGen/ARM/ |
D | fp16-vminmaxnm-vector.ll | 8 define <4 x half> @test1(<4 x half> %A, <4 x half> %B) { 12 %tmp3 = fcmp fast ogt <4 x half> %A, %B 13 %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B 14 ret <4 x half> %tmp4 17 define <4 x half> @test2(<4 x half> %A, <4 x half> %B) { 21 %tmp3 = fcmp fast ogt <4 x half> %A, %B 22 %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A 23 ret <4 x half> %tmp4 26 define <4 x half> @test3(<4 x half> %A, <4 x half> %B) { 30 %tmp3 = fcmp fast oge <4 x half> %A, %B [all …]
|
D | fp16-load-store.ll | 3 define void @load_zero(half* %in, half* %out) { 7 %arrayidx = getelementptr inbounds half, half* %in, i32 0 8 %load = load half, half* %arrayidx, align 2 9 store half %load, half* %out 13 define void @load_255(half* %in, half* %out) { 17 %arrayidx = getelementptr inbounds half, half* %in, i32 255 18 %load = load half, half* %arrayidx, align 2 19 store half %load, half* %out 23 define void @load_256(half* %in, half* %out) { 28 %arrayidx = getelementptr inbounds half, half* %in, i32 256 [all …]
|
D | fp16-fullfp16.ll | 4 define void @test_fadd(half* %p, half* %q) { 11 %a = load half, half* %p, align 2 12 %b = load half, half* %q, align 2 13 %r = fadd half %a, %b 14 store half %r, half* %p 18 define void @test_fsub(half* %p, half* %q) { 25 %a = load half, half* %p, align 2 26 %b = load half, half* %q, align 2 27 %r = fsub half %a, %b 28 store half %r, half* %p [all …]
|
D | vsel-fp16.ll | 4 @varhalf = global half 0.0 6 define void @test_vsel32sgt(i32 %lhs, i32 %rhs, half* %a_ptr, half* %b_ptr) { 17 %a = load volatile half, half* %a_ptr 18 %b = load volatile half, half* %b_ptr 20 %val1 = select i1 %tst1, half %a, half %b 21 store half %val1, half* @varhalf 25 define void @test_vsel32sge(i32 %lhs, i32 %rhs, half* %a_ptr, half* %b_ptr) { 36 %a = load volatile half, half* %a_ptr 37 %b = load volatile half, half* %b_ptr 39 %val1 = select i1 %tst1, half %a, half %b [all …]
|
D | armv8.2a-fp16-vector-intrinsics.ll | 3 %struct.float16x4x2_t = type { [2 x <4 x half>] } 4 %struct.float16x8x2_t = type { [2 x <8 x half>] } 6 define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) { 11 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) 12 ret <4 x half> %vabs1.i 15 define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) { 20 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) 21 ret <8 x half> %vabs1.i 24 define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) { 29 %0 = fcmp oeq <4 x half> %a, zeroinitializer [all …]
|
D | fp16-vminmaxnm.ll | 4 ; TODO: we can't pass half-precision arguments as "half" types yet. We do 7 ; is the shortest way to get a half type. But when we can pass half types, we 10 define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) { 16 %0 = bitcast i16 %a to half 17 %1 = bitcast i16 %b to half 18 %cmp = fcmp fast olt half %0, %1 19 %cond = select i1 %cmp, half %0, half %1 20 ret half %cond 23 define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) { 29 %0 = bitcast i16 %a to half [all …]
|
/external/llvm/test/CodeGen/ARM/ |
D | fp16-promote.ll | 17 define void @test_fadd(half* %p, half* %q) #0 { 18 %a = load half, half* %p, align 2 19 %b = load half, half* %q, align 2 20 %r = fadd half %a, %b 21 store half %r, half* %p 34 define void @test_fsub(half* %p, half* %q) #0 { 35 %a = load half, half* %p, align 2 36 %b = load half, half* %q, align 2 37 %r = fsub half %a, %b 38 store half %r, half* %p [all …]
|