/external/llvm-project/llvm/test/CodeGen/AArch64/ |
D | aarch64-bf16-ldst-intrinsics.ll | 3 %struct.bfloat16x4x2_t = type { [2 x <4 x bfloat>] } 4 %struct.bfloat16x8x2_t = type { [2 x <8 x bfloat>] } 5 %struct.bfloat16x4x3_t = type { [3 x <4 x bfloat>] } 6 %struct.bfloat16x8x3_t = type { [3 x <8 x bfloat>] } 7 %struct.bfloat16x4x4_t = type { [4 x <4 x bfloat>] } 8 %struct.bfloat16x8x4_t = type { [4 x <8 x bfloat>] } 10 define <4 x bfloat> @test_vld1_bf16(bfloat* nocapture readonly %ptr) local_unnamed_addr nounwind { 16 %0 = bitcast bfloat* %ptr to <4 x bfloat>* 17 %1 = load <4 x bfloat>, <4 x bfloat>* %0, align 2 18 ret <4 x bfloat> %1 [all …]
|
D | bf16-vector-shuffle.ll | 5 define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind { 10 %0 = bitcast i64 %a to <4 x bfloat> 11 ret <4 x bfloat> %0 15 define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind { 20 %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0 21 %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer 22 ret <4 x bfloat> %vecinit3.i 26 define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind { 31 %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0 32 %vecinit7.i = shufflevector <8 x bfloat> %vecinit.i, <8 x bfloat> undef, <8 x i32> zeroinitializer [all …]
|
D | aarch64-bf16-dotprod-intrinsics.ll | 4 define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) { 10 …<2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) 14 define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 20 …<4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) 24 define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) { 31 %.cast = bitcast <4 x bfloat> %b to <2 x float> 33 %.cast1 = bitcast <2 x float> %lane to <4 x bfloat> 34 …float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %.cast1) 38 define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 44 %.cast = bitcast <8 x bfloat> %b to <4 x float> [all …]
|
D | bf16-vector-bitcast.ll | 3 define <4 x i16> @v4bf16_to_v4i16(float, <4 x bfloat> %a) nounwind { 8 %1 = bitcast <4 x bfloat> %a to <4 x i16> 12 define <2 x i32> @v4bf16_to_v2i32(float, <4 x bfloat> %a) nounwind { 17 %1 = bitcast <4 x bfloat> %a to <2 x i32> 21 define <1 x i64> @v4bf16_to_v1i64(float, <4 x bfloat> %a) nounwind { 26 %1 = bitcast <4 x bfloat> %a to <1 x i64> 30 define i64 @v4bf16_to_i64(float, <4 x bfloat> %a) nounwind { 35 %1 = bitcast <4 x bfloat> %a to i64 39 define <2 x float> @v4bf16_to_v2float(float, <4 x bfloat> %a) nounwind { 44 %1 = bitcast <4 x bfloat> %a to <2 x float> [all …]
|
D | sve-intrinsics-bfloat.ll | 11 … x float> @bfdot_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) … 15 …llvm.aarch64.sve.bfdot(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) 19 …t> @bfdot_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) … 23 …rch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, … 27 …t> @bfdot_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) … 31 …rch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, … 35 …t> @bfdot_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) … 39 …rch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, … 43 …t> @bfdot_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) … 47 …rch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, … [all …]
|
D | bf16.ll | 6 define bfloat @test_load(bfloat* %p) nounwind { 10 %tmp1 = load bfloat, bfloat* %p, align 16 11 ret bfloat %tmp1 14 define <4 x bfloat> @test_vec_load(<4 x bfloat>* %p) nounwind { 18 %tmp1 = load <4 x bfloat>, <4 x bfloat>* %p, align 16 19 ret <4 x bfloat> %tmp1 22 define void @test_store(bfloat* %a, bfloat %b) nounwind { 26 store bfloat %b, bfloat* %a, align 16 31 define void @test_vec_store(<4 x bfloat>* %a, <4 x bfloat> %b) nounwind { 36 store <4 x bfloat> %b, <4 x bfloat>* %a, align 16
|
D | bf16-convert-intrinsics.ll | 3 declare bfloat @llvm.aarch64.neon.bfcvt(float) 4 declare <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float>) 5 declare <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat>, <4 x float>) 10 define bfloat @test_vcvth_bf16_f32(float %a) { 12 %vcvth_bf16_f32 = call bfloat @llvm.aarch64.neon.bfcvt(float %a) 13 ret bfloat %vcvth_bf16_f32 19 define <8 x bfloat> @test_vcvtq_low_bf16_f32(<4 x float> %a) { 21 %cvt = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a) 22 ret <8 x bfloat> %cvt 29 define <8 x bfloat> @test_vcvtq_high_bf16_f32(<4 x float> %a, <8 x bfloat> %inactive) { [all …]
|
D | sve-intrinsics-scalar-to-vec.ll | 61 define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) … 65 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a, 67 bfloat %b) 68 ret <vscale x 8 x bfloat> %out 91 define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 { 97 …t = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitialize… 98 ret <vscale x 8 x bfloat> %out 101 define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1… 105 …%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, … 106 ret <vscale x 8 x bfloat> %out [all …]
|
D | sve-intrinsics-perm-select-matmul-fp64.ll | 56 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwin… 60 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat> %a, 61 <vscale x 8 x bfloat> %b) 62 ret <vscale x 8 x bfloat> %out 132 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwin… 136 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat> %a, 137 <vscale x 8 x bfloat> %b) 138 ret <vscale x 8 x bfloat> %out 208 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwin… 212 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat> %a, [all …]
|
D | sve-bitcast.ll | 345 define <vscale x 16 x i8> @bitcast_bfloat_to_i8(<vscale x 8 x bfloat> %v) #0 { 349 %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8> 353 define <vscale x 8 x i16> @bitcast_bfloat_to_i16(<vscale x 8 x bfloat> %v) #0 { 357 %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16> 361 define <vscale x 4 x i32> @bitcast_bfloat_to_i32(<vscale x 8 x bfloat> %v) #0 { 365 %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32> 369 define <vscale x 2 x i64> @bitcast_bfloat_to_i64(<vscale x 8 x bfloat> %v) #0 { 373 %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64> 377 define <vscale x 8 x half> @bitcast_bfloat_to_half(<vscale x 8 x bfloat> %v) #0 { 381 %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half> [all …]
|
D | sve-intrinsics-create-tuple.ll | 101 ; SVCREATE2 (bfloat) 104 define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x … 108 …il call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bf… 111 ret <vscale x 8 x bfloat> undef 113 …%extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale … 114 ret <vscale x 8 x bfloat> %extract 117 define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x … 122 …il call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bf… 125 ret <vscale x 8 x bfloat> undef 127 …%extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale … [all …]
|
D | sve-intrinsics-ldN-reg+reg-addr-mode.ll | 36 define <vscale x 16 x bfloat> @ld2.nxv16bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 { 40 %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a 41 %res = call <vscale x 16 x bfloat> @llvm.aarch64.sve.ld2.nxv16bf16.nxv8i1.p0bf16(<vscale x 8 x i1> … 42 ret <vscale x 16 x bfloat> %res 112 define <vscale x 24 x bfloat> @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 { 116 %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a 117 %res = call <vscale x 24 x bfloat> @llvm.aarch64.sve.ld3.nxv24bf16.nxv8i1.p0bf16(<vscale x 8 x i1> … 118 ret <vscale x 24 x bfloat> %res 188 define <vscale x 32 x bfloat> @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, bfloat *%addr, i64 %a) #0 { 192 %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a [all …]
|
D | sve-intrinsics-loads.ll | 104 define <vscale x 8 x bfloat> @ld1rqh_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) { 108 …%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat… 109 ret <vscale x 8 x bfloat> %res 112 define <vscale x 8 x bfloat> @ld1rqh_bf16_imm(<vscale x 8 x i1> %pred, bfloat* %addr) { 116 %ptr = getelementptr inbounds bfloat, bfloat* %addr, i16 -8 117 …%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> %pred, bfloat… 118 ret <vscale x 8 x bfloat> %res 232 define <vscale x 8 x bfloat> @ldnt1h_bf16(<vscale x 8 x i1> %pred, bfloat* %addr) { 236 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> %pred, 237 bfloat* %addr) [all …]
|
D | sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll | 42 ; bfloat - requires -mattr=+bf16 43 define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a, i64 %index) nounwind { 47 %base = getelementptr bfloat, bfloat* %a, i64 %index 48 …%load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1> %pg, bfloat*… 49 ret <vscale x 8 x bfloat> %load 100 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1>, bfloat*)
|
D | sve-intrinsics-perm-select.ll | 61 define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale … 65 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg, 66 <vscale x 8 x bfloat> %a, 67 <vscale x 8 x bfloat> %b) 68 ret <vscale x 8 x bfloat> %out 145 define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 { 149 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg, 150 bfloat %a, 151 <vscale x 8 x bfloat> %b) 152 ret bfloat %out [all …]
|
/external/llvm-project/llvm/test/CodeGen/ARM/ |
D | bf16-intrinsics-ld-st.ll | 3 ; FIXME: Remove fullfp16 once bfloat arguments and returns lowering stops 6 define arm_aapcs_vfpcc <4 x bfloat> @test_vld1_bf16(bfloat* nocapture readonly %ptr) { 12 %0 = bitcast bfloat* %ptr to <4 x bfloat>* 13 %1 = load <4 x bfloat>, <4 x bfloat>* %0, align 2 14 ret <4 x bfloat> %1 17 define arm_aapcs_vfpcc <8 x bfloat> @test_vld1q_bf16(bfloat* nocapture readonly %ptr) { 23 %0 = bitcast bfloat* %ptr to <8 x bfloat>* 24 %1 = load <8 x bfloat>, <8 x bfloat>* %0, align 2 25 ret <8 x bfloat> %1 28 define arm_aapcs_vfpcc <4 x bfloat> @test_vld1_lane_bf16(bfloat* nocapture readonly %ptr, <4 x bflo… [all …]
|
D | bf16-create-get-set-dup.ll | 3 ; FIXME: Remove fullfp16 once bfloat arguments and returns lowering stops 9 define arm_aapcs_vfpcc <4 x bfloat> @test_vcreate_bf16(i64 %a) { 15 %0 = bitcast i64 %a to <4 x bfloat> 16 ret <4 x bfloat> %0 19 define arm_aapcs_vfpcc <4 x bfloat> @test_vdup_n_bf16(bfloat %v) { 26 %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0 27 %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer 28 ret <4 x bfloat> %vecinit3.i 31 define arm_aapcs_vfpcc <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) { 38 %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0 [all …]
|
D | arm-bf16-dotprod-intrinsics.ll | 4 define arm_aapcs_vfpcc <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %… 10 … <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) #3 14 define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 20 … <4 x float> @llvm.arm.neon.bfdot.v4f32.v8bf16(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) #3 24 define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) { 30 %.cast = bitcast <4 x bfloat> %b to <2 x float> 32 %.cast1 = bitcast <2 x float> %lane to <4 x bfloat> 33 … x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %.cast1)… 37 define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 44 %.cast = bitcast <8 x bfloat> %b to <4 x float> [all …]
|
D | bfloat.ll | 8 define bfloat @load_scalar_bf(bfloat* %addr) { 20 %0 = load bfloat, bfloat* %addr, align 2 21 ret bfloat %0 24 define void @store_scalar_bf(bfloat %v, bfloat* %addr) { 36 store bfloat %v, bfloat* %addr, align 2 40 define <4 x bfloat> @load_vector4_bf(<4 x bfloat>* %addr) { 52 %0 = load <4 x bfloat>, <4 x bfloat>* %addr, align 8 53 ret <4 x bfloat> %0 56 define void @store_vector4_bf(<4 x bfloat> %v, <4 x bfloat>* %addr) { 67 store <4 x bfloat> %v, <4 x bfloat>* %addr, align 8 [all …]
|
D | bf16-getlane-with-fp16.ll | 7 define arm_aapcs_vfpcc bfloat @test_vgetq_lane_bf16_even(<8 x bfloat> %v) { 13 %0 = extractelement <8 x bfloat> %v, i32 6 14 ret bfloat %0 17 define arm_aapcs_vfpcc bfloat @test_vgetq_lane_bf16_odd(<8 x bfloat> %v) { 23 %0 = extractelement <8 x bfloat> %v, i32 7 24 ret bfloat %0 27 define arm_aapcs_vfpcc bfloat @test_vget_lane_bf16_even(<4 x bfloat> %v) { 33 %0 = extractelement <4 x bfloat> %v, i32 2 34 ret bfloat %0 37 define arm_aapcs_vfpcc bfloat @test_vget_lane_bf16_odd(<4 x bfloat> %v) { [all …]
|
D | arm-bf16-pcs.ll | 7 define bfloat @bf_load_soft(bfloat* %p) { 18 %f = load bfloat, bfloat* %p, align 2 19 ret bfloat %f 22 define arm_aapcs_vfpcc bfloat @bf_load_hard(bfloat* %p) { 33 %f = load bfloat, bfloat* %p, align 2 34 ret bfloat %f 37 define void @bf_store_soft(bfloat* %p, bfloat %f) { 48 store bfloat %f, bfloat* %p, align 2 52 define arm_aapcs_vfpcc void @bf_store_hard(bfloat* %p, bfloat %f) { 63 store bfloat %f, bfloat* %p, align 2 [all …]
|
D | bf16-convert-intrinsics.ll | 4 declare bfloat @llvm.arm.neon.vcvtbfp2bf(float) 7 declare <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float>) 9 define arm_aapcs_vfpcc <4 x bfloat> @test_vcvt_bf16_f32_hardfp(<4 x float> %a) { 15 %vcvtfp2bf1.i.i = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> %a) 16 ret <4 x bfloat> %vcvtfp2bf1.i.i 19 define arm_aapcs_vfpcc bfloat @test_vcvth_bf16_f32_hardfp(float %a) { 25 %vcvtbfp2bf.i = call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a) 26 ret bfloat %vcvtbfp2bf.i 46 define bfloat @test_vcvth_bf16_f32_softfp(float %a) #1 { 54 %vcvtbfp2bf.i = call bfloat @llvm.arm.neon.vcvtbfp2bf(float %a) #3 [all …]
|
/external/llvm-project/llvm/test/Assembler/ |
D | bfloat.ll | 4 ; Basic smoke tests for bfloat type. 6 define bfloat @check_bfloat(bfloat %A) { 7 ; ASSEM-DISASS: ret bfloat %A 8 ret bfloat %A 11 define bfloat @check_bfloat_literal() { 12 ; ASSEM-DISASS: ret bfloat 0xR3149 13 ret bfloat 0xR3149 16 define <4 x bfloat> @check_fixed_vector() { 17 ; ASSEM-DISASS: ret <4 x bfloat> %tmp 18 %tmp = fadd <4 x bfloat> undef, undef [all …]
|
/external/llvm-project/llvm/test/Bitcode/ |
D | arm-bf16-upgrade.ll | 5 define arm_aapcs_vfpcc <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %… 8 %0 = bitcast <4 x bfloat> %a to <8 x i8> 9 %1 = bitcast <4 x bfloat> %b to <8 x i8> 11 ; CHECK: %2 = bitcast <8 x i8> %0 to <4 x bfloat> 12 ; CHECK-NEXT: %3 = bitcast <8 x i8> %1 to <4 x bfloat> 13 …all <2 x float> @llvm.arm.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %2, <4 x bfloat> %3) 17 define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 20 %0 = bitcast <8 x bfloat> %a to <16 x i8> 21 %1 = bitcast <8 x bfloat> %b to <16 x i8> 23 ; CHECK: %2 = bitcast <16 x i8> %0 to <8 x bfloat> [all …]
|
D | aarch64-bf16-upgrade.ll | 5 define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) { 8 %0 = bitcast <4 x bfloat> %a to <8 x i8> 9 %1 = bitcast <4 x bfloat> %b to <8 x i8> 10 ; CHECK: %2 = bitcast <8 x i8> %0 to <4 x bfloat> 11 ; CHECK-NEXT: %3 = bitcast <8 x i8> %1 to <4 x bfloat> 12 …<2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16(<2 x float> %r, <4 x bfloat> %2, <4 x bfloat> %3) 17 define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) { 20 %0 = bitcast <8 x bfloat> %a to <16 x i8> 21 %1 = bitcast <8 x bfloat> %b to <16 x i8> 22 ; CHECK: %2 = bitcast <16 x i8> %0 to <8 x bfloat> [all …]
|