Home
last modified time | relevance | path

Searched refs:half (Results 1 – 25 of 2870) sorted by relevance

12345678910>>...115

/external/eigen/test/
Dhalf_float.cpp16 struct half;
19 using Eigen::half;
26 VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); in test_conversion()
27 VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); in test_conversion()
28 VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); in test_conversion()
29 VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); in test_conversion()
30 VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); in test_conversion()
31 VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); in test_conversion()
32 VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. in test_conversion()
35 VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); in test_conversion()
[all …]
/external/llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/
Dpacked-math.ll8 define amdgpu_kernel void @test1_as_3_3_3_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half
10 ; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)*
11 ; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2
12 ; GCN-NEXT: [[TMP3:%.*]] = bitcast half addrspace(3)* [[B:%.*]] to <2 x half> addrspace(3)*
13 ; GCN-NEXT: [[TMP4:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP3]], align 2
14 ; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]]
15 ; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)*
16 ; GCN-NEXT: store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2
19 %i0 = load half, half addrspace(3)* %a, align 2
20 %i1 = load half, half addrspace(3)* %b, align 2
[all …]
/external/llvm-project/llvm/test/CodeGen/AArch64/
Dfp16-vector-load-store.ll4 define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 {
8 %0 = load <4 x half>, <4 x half>* %a, align 8
9 ret <4 x half> %0
13 define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 {
17 %0 = load <8 x half>, <8 x half>* %a, align 16
18 ret <8 x half> %0
22 define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 {
26 %0 = load half, half* %a, align 2
27 %1 = insertelement <4 x half> undef, half %0, i32 0
28 %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
[all …]
Dfp16_intrinsic_lane.ll4 declare half @llvm.aarch64.neon.fmulx.f16(half, half)
5 declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>)
6 declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>)
7 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
8 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
9 declare half @llvm.fma.f16(half, half, half) #1
11 define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane…
19 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
20 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a)
21 ret <4 x half> %fmla3
[all …]
Dfp16_intrinsic_scalar_3op.ll3 define dso_local half @t_vfmah_f16(half %a, half %b, half %c) {
8 %0 = tail call half @llvm.fma.f16(half %b, half %c, half %a)
9 ret half %0
12 define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp {
16 %0 = tail call half @llvm.fma.f16(half %a, half %b, half %c)
17 %mul = fmul half %0, -1.000000e+00
18 ret half %mul
21 define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
25 %mul = fmul half %b, -1.000000e+00
26 %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
[all …]
Dfp16_intrinsic_vector_2op.ll3 declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>)
4 declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>)
5 declare <4 x half> @llvm.aarch64.neon.fminnmp.v4f16(<4 x half>, <4 x half>)
6 declare <8 x half> @llvm.aarch64.neon.fminnmp.v8f16(<8 x half>, <8 x half>)
7 declare <4 x half> @llvm.aarch64.neon.fmaxnmp.v4f16(<4 x half>, <4 x half>)
8 declare <8 x half> @llvm.aarch64.neon.fmaxnmp.v8f16(<8 x half>, <8 x half>)
9 declare <4 x half> @llvm.aarch64.neon.fabd.v4f16(<4 x half>, <4 x half>)
10 declare <8 x half> @llvm.aarch64.neon.fabd.v8f16(<8 x half>, <8 x half>)
11 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
12 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
[all …]
/external/llvm/test/CodeGen/AArch64/
Dfp16-vector-load-store.ll4 define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 {
8 %0 = load <4 x half>, <4 x half>* %a, align 8
9 ret <4 x half> %0
13 define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 {
17 %0 = load <8 x half>, <8 x half>* %a, align 16
18 ret <8 x half> %0
22 define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 {
26 %0 = load half, half* %a, align 2
27 %1 = insertelement <4 x half> undef, half %0, i32 0
28 %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
[all …]
Dfp16-vector-shuffle.ll4 define <4 x half> @select_64(<4 x half> %a, <4 x half> %b, <4 x i16> %c) #0 {
8 %0 = bitcast <4 x half> %a to <4 x i16>
9 %1 = bitcast <4 x half> %b to <4 x i16>
14 %3 = bitcast <4 x i16> %vbsl5.i to <4 x half>
15 ret <4 x half> %3
19 define <8 x half> @select_128(<8 x half> %a, <8 x half> %b, <8 x i16> %c) #0 {
23 %0 = bitcast <8 x half> %a to <8 x i16>
24 %1 = bitcast <8 x half> %b to <8 x i16>
29 %3 = bitcast <8 x i16> %vbsl5.i to <8 x half>
30 ret <8 x half> %3
[all …]
/external/eigen/Eigen/src/Core/arch/CUDA/
DHalf.h49 struct half;
77 struct half : public half_impl::half_base { struct
82 EIGEN_DEVICE_FUNC half() {} in half() function
84 EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} in half() function
85 EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {} in half() function
87 explicit EIGEN_DEVICE_FUNC half(bool b) in half() argument
90 explicit EIGEN_DEVICE_FUNC half(const T& val) in half() argument
92 explicit EIGEN_DEVICE_FUNC half(float f) in half() function
136 EIGEN_DEVICE_FUNC half& operator=(const half& other) { argument
151 __device__ half operator + (const half& a, const half& b) {
[all …]
/external/tensorflow/tensorflow/compiler/xla/tests/
Dhalf_test.cc43 std::function<half(half)> compute_func;
51 std::vector<half> x({half(1.4), half(-2.3), half(3.2), half(-4.1), half(9.0), in XLA_TEST_P()
52 half(42.0), half(-9.0), half(-100.0)}); in XLA_TEST_P()
55 auto x_data = CreateR1Parameter<half>(x, /*parameter_number=*/0, "x", in XLA_TEST_P()
58 std::function<half(half)> compute_func = GetParam().compute_func; in XLA_TEST_P()
59 std::vector<half> expected; in XLA_TEST_P()
67 ComputeAndCompareR1<half>(&builder, expected, {x_data.get()}, error_spec_); in XLA_TEST_P()
70 half sign_imp(half value) { in sign_imp()
72 return half((x < .0) ? -1 : (x > .0)); in sign_imp()
75 half round_imp(half value) { in round_imp()
[all …]
/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/
Dspace_to_depth_test_util.cc32 src_tensor.data = {half(1.0f), half(2.0f), half(3.0f), half(4.0f)}; in SpaceToDepthTensorShape1x2x2x1BlockSize2Test()
47 PointWiseNear({half(1.0f), half(2.0f), half(3.0f), half(4.0f)}, in SpaceToDepthTensorShape1x2x2x1BlockSize2Test()
58 src_tensor.data = {half(1.4f), half(2.3f), half(3.2f), half(4.1f), in SpaceToDepthTensorShape1x2x2x2BlockSize2Test()
59 half(5.4f), half(6.3f), half(7.2f), half(8.1f)}; in SpaceToDepthTensorShape1x2x2x2BlockSize2Test()
74 PointWiseNear({half(1.4f), half(2.3f), half(3.2f), half(4.1f), in SpaceToDepthTensorShape1x2x2x2BlockSize2Test()
75 half(5.4f), half(6.3f), half(7.2f), half(8.1f)}, in SpaceToDepthTensorShape1x2x2x2BlockSize2Test()
86 src_tensor.data = {half(1.0f), half(2.0f), half(3.0f), half(4.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test()
87 half(5.0f), half(6.0f), half(7.0f), half(8.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test()
88 half(9.0f), half(10.0f), half(11.0f), half(12.0f)}; in SpaceToDepthTensorShape1x2x2x3BlockSize2Test()
103 PointWiseNear({half(1.0f), half(2.0f), half(3.0f), half(4.0f), in SpaceToDepthTensorShape1x2x2x3BlockSize2Test()
[all …]
Dconcat_test_util.cc32 src0.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)}; in ConcatWidthTest()
34 src1.data = {half(1.0f), half(-1.2f), half(-0.45f), half(1.045f), in ConcatWidthTest()
35 half(1.1f), half(-1.3f), half(-0.55f), half(2.045f)}; in ConcatWidthTest()
54 PointWiseNear({half(0.0f), half(-1.0f), half(1.0f), half(-1.2f), in ConcatWidthTest()
55 half(-0.45f), half(1.045f), half(-0.05f), half(0.045f), in ConcatWidthTest()
56 half(1.1f), half(-1.3f), half(-0.55f), half(2.045f)}, in ConcatWidthTest()
66 src0.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)}; in ConcatHeightTest()
68 src1.data = {half(1.0f), half(-1.2f)}; in ConcatHeightTest()
86 RETURN_IF_ERROR(PointWiseNear({half(0.0f), half(-1.0f), half(-0.05f), in ConcatHeightTest()
87 half(0.045f), half(1.0f), half(-1.2f)}, in ConcatHeightTest()
[all …]
/external/llvm-project/llvm/test/CodeGen/AMDGPU/
Dllvm.amdgcn.fmad.ftz.f16.ll5 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
10 half addrspace(1)* %r,
11 half addrspace(1)* %a,
12 half addrspace(1)* %b,
13 half addrspace(1)* %c) {
14 %a.val = load half, half addrspace(1)* %a
15 %b.val = load half, half addrspace(1)* %b
16 %c.val = load half, half addrspace(1)* %c
17 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val)
18 store half %r.val, half addrspace(1)* %r
[all …]
Dpacked-op-sel.ll14 …kernel void @fma_vector_vector_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %…
16 %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1
18 %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4
19 %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4
20 %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2
22 %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0
23 …%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitia…
25 …%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %sca…
26 store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4
42 …ma_vector_vector_neg_broadcast_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %…
[all …]
Dfcanonicalize.f16.ll5 declare half @llvm.fabs.f16(half) #0
6 declare half @llvm.canonicalize.f16(half) #0
7 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
8 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
9 declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0
10 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
16 define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(half addrspace(1)* %out) #1 {
17 %canonicalized = call half @llvm.canonicalize.f16(half undef)
18 store half %canonicalized, half addrspace(1)* %out
28 define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
[all …]
Dv_mac_f16.ll18 half addrspace(1)* %r,
19 half addrspace(1)* %a,
20 half addrspace(1)* %b,
21 half addrspace(1)* %c) #0 {
23 %a.val = load half, half addrspace(1)* %a
24 %b.val = load half, half addrspace(1)* %b
25 %c.val = load half, half addrspace(1)* %c
27 %t.val = fmul half %a.val, %b.val
28 %r.val = fadd half %t.val, %c.val
30 store half %r.val, half addrspace(1)* %r
[all …]
Dfneg-fabs.f16.ll12 define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
13 %fabs = call half @llvm.fabs.f16(half %x)
14 %fsub = fsub half -0.0, %fabs
15 %fadd = fadd half %y, %fsub
16 store half %fadd, half addrspace(1)* %out, align 2
30 define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
31 %fabs = call half @llvm.fabs.f16(half %x)
32 %fsub = fsub half -0.0, %fabs
33 %fmul = fmul half %y, %fsub
34 store half %fmul, half addrspace(1)* %out, align 2
[all …]
Dpk_max_f16_literal.ll6 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
10 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
11 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
12 …%tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH0000, half 0…
13 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
19 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
23 %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1
24 %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4
25 …%tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0…
26 store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4
[all …]
/external/llvm-project/llvm/test/CodeGen/ARM/
Dfp16-vminmaxnm-vector.ll8 define <4 x half> @test1(<4 x half> %A, <4 x half> %B) {
12 %tmp3 = fcmp fast ogt <4 x half> %A, %B
13 %tmp4 = select <4 x i1> %tmp3, <4 x half> %A, <4 x half> %B
14 ret <4 x half> %tmp4
17 define <4 x half> @test2(<4 x half> %A, <4 x half> %B) {
21 %tmp3 = fcmp fast ogt <4 x half> %A, %B
22 %tmp4 = select <4 x i1> %tmp3, <4 x half> %B, <4 x half> %A
23 ret <4 x half> %tmp4
26 define <4 x half> @test3(<4 x half> %A, <4 x half> %B) {
30 %tmp3 = fcmp fast oge <4 x half> %A, %B
[all …]
Dfp16-load-store.ll3 define void @load_zero(half* %in, half* %out) {
7 %arrayidx = getelementptr inbounds half, half* %in, i32 0
8 %load = load half, half* %arrayidx, align 2
9 store half %load, half* %out
13 define void @load_255(half* %in, half* %out) {
17 %arrayidx = getelementptr inbounds half, half* %in, i32 255
18 %load = load half, half* %arrayidx, align 2
19 store half %load, half* %out
23 define void @load_256(half* %in, half* %out) {
28 %arrayidx = getelementptr inbounds half, half* %in, i32 256
[all …]
Dfp16-fullfp16.ll4 define void @test_fadd(half* %p, half* %q) {
11 %a = load half, half* %p, align 2
12 %b = load half, half* %q, align 2
13 %r = fadd half %a, %b
14 store half %r, half* %p
18 define void @test_fsub(half* %p, half* %q) {
25 %a = load half, half* %p, align 2
26 %b = load half, half* %q, align 2
27 %r = fsub half %a, %b
28 store half %r, half* %p
[all …]
Dvsel-fp16.ll4 @varhalf = global half 0.0
6 define void @test_vsel32sgt(i32 %lhs, i32 %rhs, half* %a_ptr, half* %b_ptr) {
17 %a = load volatile half, half* %a_ptr
18 %b = load volatile half, half* %b_ptr
20 %val1 = select i1 %tst1, half %a, half %b
21 store half %val1, half* @varhalf
25 define void @test_vsel32sge(i32 %lhs, i32 %rhs, half* %a_ptr, half* %b_ptr) {
36 %a = load volatile half, half* %a_ptr
37 %b = load volatile half, half* %b_ptr
39 %val1 = select i1 %tst1, half %a, half %b
[all …]
Darmv8.2a-fp16-vector-intrinsics.ll3 %struct.float16x4x2_t = type { [2 x <4 x half>] }
4 %struct.float16x8x2_t = type { [2 x <8 x half>] }
6 define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
11 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
12 ret <4 x half> %vabs1.i
15 define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
20 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
21 ret <8 x half> %vabs1.i
24 define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
29 %0 = fcmp oeq <4 x half> %a, zeroinitializer
[all …]
Dfp16-vminmaxnm.ll4 ; TODO: we can't pass half-precision arguments as "half" types yet. We do
7 ; is the shortest way to get a half type. But when we can pass half types, we
10 define half @fp16_vminnm_o(i16 signext %a, i16 signext %b) {
16 %0 = bitcast i16 %a to half
17 %1 = bitcast i16 %b to half
18 %cmp = fcmp fast olt half %0, %1
19 %cond = select i1 %cmp, half %0, half %1
20 ret half %cond
23 define half @fp16_vminnm_o_rev(i16 signext %a, i16 signext %b) {
29 %0 = bitcast i16 %a to half
[all …]
/external/llvm/test/CodeGen/ARM/
Dfp16-promote.ll17 define void @test_fadd(half* %p, half* %q) #0 {
18 %a = load half, half* %p, align 2
19 %b = load half, half* %q, align 2
20 %r = fadd half %a, %b
21 store half %r, half* %p
34 define void @test_fsub(half* %p, half* %q) #0 {
35 %a = load half, half* %p, align 2
36 %b = load half, half* %q, align 2
37 %r = fsub half %a, %b
38 store half %r, half* %p
[all …]

12345678910>>...115