/external/llvm/test/CodeGen/X86/ |
D | avx512-intrinsics-upgrade.ll | 4 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) noun… 17 …%res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> z… 18 …%res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> … 19 …%res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> … 25 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) noun… 38 …%res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> … 39 …%res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double>… 40 …%res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double>… 46 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) 58 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) [all …]
|
D | avx512dq-intrinsics.ll | 4 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 14 …%res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, … 15 …%res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, … 20 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 30 …%res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2,… 31 …%res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1,… 36 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 46 …%res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i… 47 …%res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i… 52 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) [all …]
|
D | avx512cdvl-intrinsics.ll | 6 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) 18 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 19 %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 20 …%res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer,… 26 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) 36 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 37 %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 42 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) 52 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 53 %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) [all …]
|
D | avx512-gather-scatter-intrin.ll | 4 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) 5 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) 6 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) 7 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) 9 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) 10 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) 11 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) 12 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) 23 …%x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%i… 25 …call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %… [all …]
|
D | avx512-intrinsics.ll | 4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 14 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 18 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 28 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 32 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 44 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 45 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 49 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 57 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 61 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone [all …]
|
D | avx512er-intrinsics.ll | 5 …%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitialize… 12 …%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i3… 18 …%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, … 25 …%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitialize… 31 …%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, … 36 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind rea… 40 …%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer,… 43 declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readn… 47 …%res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer,… 50 declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readno… [all …]
|
D | avx512bwvl-intrinsics-upgrade.ll | 4 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) 16 %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) 17 %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) 18 …%res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, … 24 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) 36 %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 37 %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) 38 …%res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, … 44 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) 56 %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) [all …]
|
D | avx512ifmavl-intrinsics.ll | 4 declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 22 …%res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i6… 23 …%res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i… 24 …%res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64>… 25 …%res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i… 32 declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 50 …%res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i6… 51 …%res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i… 52 …%res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64>… 53 …%res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i… [all …]
|
D | avx512vl-intrinsics.ll | 34 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 36 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 38 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 40 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 42 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 44 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 46 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 48 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 82 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 84 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) [all …]
|
D | avx512-fma-intrinsics.ll | 4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, … 5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, … 12 …%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, … 15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>,… 23 …%res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, … 32 …%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, … 35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>,… 43 …%res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, … 52 …%res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, … 55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>,… [all …]
|
D | avx512-scalarIntrinsics.ll | 10 …%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> … 13 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind… 20 …%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> ze… 23 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind r… 30 …%res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x doub… 33 declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) noun… 40 …%res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double… 44 declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwi… 46 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) 51 …%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x flo… [all …]
|
D | avx512vl-intrinsics-upgrade.ll | 4 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 17 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) 18 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 19 …%res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, … 25 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 37 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 38 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 39 …%res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, … 45 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 57 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) [all …]
|
D | avx512ifma-intrinsics.ll | 3 declare <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 19 …%res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i6… 20 …%res1 = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i… 21 …%res2 = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64>… 22 …%res3 = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i… 29 declare <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 45 …%res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i… 46 …%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x … 47 …%res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> zeroinitializer, <8 x i64… 48 …%res3 = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x … [all …]
|
D | avx512dqvl-intrinsics.ll | 9 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 20 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %pas… 30 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 40 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 52 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %pas… 63 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 75 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 89 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %pas… 102 …%res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zero… 105 declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) [all …]
|
D | avx512vbmi-intrinsics.ll | 3 declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 15 …%res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>… 16 …%res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8… 17 …%res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8… 23 declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 32 …%res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x… 33 …%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 … 34 …%res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 … 40 declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 54 …%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x … [all …]
|
D | avx512bw-intrinsics.ll | 72 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 75 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 77 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 79 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 81 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 83 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 85 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 161 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 162 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) [all …]
|
D | avx512bwvl-intrinsics.ll | 35 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 37 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 39 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 41 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 43 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 45 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 47 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 49 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 84 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 86 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) [all …]
|
D | vector-shuffle-combining-avx512bw.ll | 4 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 6 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>… 8 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) 9 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 10 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 11 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 13 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>… 14 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float… 16 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 17 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) [all …]
|
D | avx512vbmivl-intrinsics.ll | 3 declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 15 …%res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>… 16 …%res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8… 17 …%res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8… 23 declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 35 …%res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>… 36 …%res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8… 37 …%res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8… 43 declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 55 …%res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x… [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-avx512skx-mul32-ld128-x16.c | 25 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 26 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 27 …const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx512.output_zero_p… in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 28 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx512.output_min); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 29 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx512.output_max); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 32 _mm512_broadcastd_epi32(_mm_cvtsi32_si128(params->avx512.b_multiplier[0] * (int32_t) *input_b)), in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 33 _mm512_load_si512(params->avx512.bias)); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16()
|
D | minmax-avx512skx-mul32-ld128-x32.c | 25 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 26 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 27 const __m512i voutput_zero_point = _mm512_load_si512(params->avx512.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 28 const __m256i voutput_min = _mm256_load_si256((const __m256i*) params->avx512.output_min); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 29 const __m256i voutput_max = _mm256_load_si256((const __m256i*) params->avx512.output_max); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 32 _mm512_broadcastd_epi32(_mm_cvtsi32_si128(params->avx512.b_multiplier[0] * (int32_t) *input_b)), in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 33 _mm512_load_si512(params->avx512.bias)); in xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32()
|
/external/XNNPACK/src/qu8-vaddc/gen/ |
D | minmax-avx512skx-mul32-ld128-x16.c | 25 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 26 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 27 …const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx512.output_zero_p… in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 28 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx512.output_min); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 29 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx512.output_max); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 32 _mm512_broadcastd_epi32(_mm_cvtsi32_si128(params->avx512.b_multiplier[0] * (int32_t) *input_b)), in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16() 33 _mm512_load_si512(params->avx512.bias)); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x16()
|
D | minmax-avx512skx-mul32-ld128-x32.c | 25 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 26 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 27 const __m512i voutput_zero_point = _mm512_load_si512(params->avx512.output_zero_point); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 28 const __m256i voutput_min = _mm256_load_si256((const __m256i*) params->avx512.output_min); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 29 const __m256i voutput_max = _mm256_load_si256((const __m256i*) params->avx512.output_max); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 32 _mm512_broadcastd_epi32(_mm_cvtsi32_si128(params->avx512.b_multiplier[0] * (int32_t) *input_b)), in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32() 33 _mm512_load_si512(params->avx512.bias)); in xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_x32()
|
/external/XNNPACK/src/qu8-vadd/gen/ |
D | minmax-avx512skx-mul32-ld128-x16.c | 25 const __m512i vbias = _mm512_load_si512(params->avx512.bias); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 26 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 27 const __m512i vb_multiplier = _mm512_load_si512(params->avx512.b_multiplier); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 28 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 29 …const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx512.output_zero_p… in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx512.output_min); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx512.output_max); in xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx512skx-mul32-ld128-x16.c | 25 const __m512i vbias = _mm512_load_si512(params->avx512.bias); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 26 const __m512i va_multiplier = _mm512_load_si512(params->avx512.a_multiplier); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 27 const __m512i vb_multiplier = _mm512_load_si512(params->avx512.b_multiplier); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 28 const __m128i vshift = _mm_load_si128((const __m128i*) params->avx512.shift); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 29 …const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx512.output_zero_p… in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx512.output_min); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx512.output_max); in xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_x16()
|