1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 3 4declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 5define i32 @test_kortestz(i16 %a0, i16 %a1) { 6; CHECK-LABEL: test_kortestz: 7; CHECK: ## BB#0: 8; CHECK-NEXT: kmovw %esi, %k0 9; CHECK-NEXT: kmovw %edi, %k1 10; CHECK-NEXT: kortestw %k0, %k1 11; CHECK-NEXT: sete %al 12; CHECK-NEXT: kmovw %eax, %k0 13; CHECK-NEXT: kmovw %k0, %eax 14; CHECK-NEXT: andl $1, %eax 15; CHECK-NEXT: retq 16 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 17 ret i32 %res 18} 19 20declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 21define i32 @test_kortestc(i16 %a0, i16 %a1) { 22; CHECK-LABEL: test_kortestc: 23; CHECK: ## BB#0: 24; CHECK-NEXT: kmovw %esi, %k0 25; CHECK-NEXT: kmovw %edi, %k1 26; CHECK-NEXT: kortestw %k0, %k1 27; CHECK-NEXT: sbbl %eax, %eax 28; CHECK-NEXT: andl $1, %eax 29; CHECK-NEXT: retq 30 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 31 ret i32 %res 32} 33 34declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 35define i16 @test_kand(i16 %a0, i16 %a1) { 36; CHECK-LABEL: test_kand: 37; CHECK: ## BB#0: 38; CHECK-NEXT: movw $8, %ax 39; CHECK-NEXT: kmovw %eax, %k0 40; CHECK-NEXT: kmovw %edi, %k1 41; CHECK-NEXT: kandw %k0, %k1, %k0 42; CHECK-NEXT: kmovw %esi, %k1 43; CHECK-NEXT: kandw %k1, %k0, %k0 44; CHECK-NEXT: kmovw %k0, %eax 45; CHECK-NEXT: retq 46 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 47 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 48 ret i16 %t2 49} 50 51declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 52define i16 @test_knot(i16 %a0) { 53; CHECK-LABEL: test_knot: 54; CHECK: ## BB#0: 55; CHECK-NEXT: kmovw %edi, %k0 56; CHECK-NEXT: knotw %k0, %k0 57; CHECK-NEXT: kmovw %k0, %eax 58; CHECK-NEXT: retq 59 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 60 ret i16 %res 61} 62 63declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 64 65define i16 @unpckbw_test(i16 %a0, i16 %a1) { 66; CHECK-LABEL: unpckbw_test: 67; CHECK: ## BB#0: 68; CHECK-NEXT: kmovw %edi, %k0 69; CHECK-NEXT: kmovw %esi, %k1 70; CHECK-NEXT: kunpckbw %k1, %k0, %k0 71; CHECK-NEXT: kmovw %k0, %eax 72; CHECK-NEXT: retq 73 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 74 ret i16 %res 75} 76 77define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 78; CHECK-LABEL: test_rcp_ps_512: 79; CHECK: ## BB#0: 80; CHECK-NEXT: vrcp14ps %zmm0, %zmm0 81; CHECK-NEXT: retq 82 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 83 ret <16 x float> %res 84} 85declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 86 87define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 88; CHECK-LABEL: test_rcp_pd_512: 89; CHECK: ## BB#0: 90; CHECK-NEXT: vrcp14pd %zmm0, %zmm0 91; CHECK-NEXT: retq 92 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 93 ret <8 x double> %res 94} 95declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 96 97declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 98 99define <8 x double> @test7(<8 x double> %a) { 100; CHECK-LABEL: test7: 101; CHECK: ## BB#0: 102; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 103; CHECK-NEXT: retq 104 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 105 ret <8 x double>%res 106} 107 108declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 109 110define <16 x float> @test8(<16 x float> %a) { 111; CHECK-LABEL: test8: 112; CHECK: ## BB#0: 113; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 114; CHECK-NEXT: retq 115 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 116 ret <16 x float>%res 117} 118 119define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 120; CHECK-LABEL: test_rsqrt_ps_512: 121; CHECK: ## BB#0: 122; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0 123; CHECK-NEXT: retq 124 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 125 ret <16 x float> %res 126} 127declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 128 129define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 130; CHECK-LABEL: test_rsqrt14_ss: 131; CHECK: ## BB#0: 132; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 133; CHECK-NEXT: retq 134 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 135 ret <4 x float> %res 136} 137declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 138 139define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 140; CHECK-LABEL: test_rcp14_ss: 141; CHECK: ## BB#0: 142; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 143; CHECK-NEXT: retq 144 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 145 ret <4 x float> %res 146} 147declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 148 149define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 150; CHECK-LABEL: test_sqrt_pd_512: 151; CHECK: ## BB#0: 152; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 153; CHECK-NEXT: retq 154 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 155 ret <8 x double> %res 156} 157declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 158 159define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 160; CHECK-LABEL: test_sqrt_ps_512: 161; CHECK: ## BB#0: 162; CHECK-NEXT: vsqrtps %zmm0, %zmm0 163; CHECK-NEXT: retq 164 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 165 ret <16 x float> %res 166} 167define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { 168; CHECK-LABEL: test_sqrt_round_ps_512: 169; CHECK: ## BB#0: 170; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 171; CHECK-NEXT: retq 172 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) 173 ret <16 x float> %res 174} 175declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 176 177define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { 178; CHECK-LABEL: test_getexp_pd_512: 179; CHECK: ## BB#0: 180; CHECK-NEXT: vgetexppd %zmm0, %zmm0 181; CHECK-NEXT: retq 182 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) 183 ret <8 x double> %res 184} 185define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { 186; CHECK-LABEL: test_getexp_round_pd_512: 187; CHECK: ## BB#0: 188; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 189; CHECK-NEXT: retq 190 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 191 ret <8 x double> %res 192} 193declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 194 195define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { 196; CHECK-LABEL: test_getexp_ps_512: 197; CHECK: ## BB#0: 198; CHECK-NEXT: vgetexpps %zmm0, %zmm0 199; CHECK-NEXT: retq 200 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 201 ret <16 x float> %res 202} 203 204define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { 205; CHECK-LABEL: test_getexp_round_ps_512: 206; CHECK: ## BB#0: 207; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0 208; CHECK-NEXT: retq 209 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 210 ret <16 x float> %res 211} 212declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 213 214declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 215 216define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 217; CHECK-LABEL: test_sqrt_ss: 218; CHECK: ## BB#0: 219; CHECK-NEXT: andl $1, %edi 220; CHECK-NEXT: kmovw %edi, %k1 221; CHECK-NEXT: vmovaps %zmm2, %zmm3 222; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1} 223; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 224; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 225; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0 226; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 227; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 228; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 229; CHECK-NEXT: retq 230 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 231 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 232 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2) 233 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3) 234 235 %res.1 = fadd <4 x float> %res0, %res1 236 %res.2 = fadd <4 x float> %res2, %res3 237 %res = fadd <4 x float> %res.1, %res.2 238 ret <4 x float> %res 239} 240 241declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 242 243define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 244; CHECK-LABEL: test_sqrt_sd: 245; CHECK: ## BB#0: 246; CHECK-NEXT: andl $1, %edi 247; CHECK-NEXT: kmovw %edi, %k1 248; CHECK-NEXT: vmovaps %zmm2, %zmm3 249; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1} 250; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 251; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 252; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0 253; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 254; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0 255; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 256; CHECK-NEXT: retq 257 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 258 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 259 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2) 260 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3) 261 262 %res.1 = fadd <2 x double> %res0, %res1 263 %res.2 = fadd <2 x double> %res2, %res3 264 %res = fadd <2 x double> %res.1, %res.2 265 ret <2 x double> %res 266} 267 268define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 269; CHECK-LABEL: test_x86_sse2_cvtsd2si64: 270; CHECK: ## BB#0: 271; CHECK-NEXT: vcvtsd2si %xmm0, %rax 272; CHECK-NEXT: retq 273 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 274 ret i64 %res 275} 276declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 277 278define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 279; CHECK-LABEL: test_x86_sse2_cvtsi642sd: 280; CHECK: ## BB#0: 281; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 282; CHECK-NEXT: retq 283 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 284 ret <2 x double> %res 285} 286declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 287 288define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { 289; CHECK-LABEL: test_x86_avx512_cvttsd2si64: 290; CHECK: ## BB#0: 291; CHECK-NEXT: vcvttsd2si %xmm0, %rcx 292; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax 293; CHECK-NEXT: addq %rcx, %rax 294; CHECK-NEXT: retq 295 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; 296 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; 297 %res2 = add i64 %res0, %res1 298 ret i64 %res2 299} 300declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone 301 302define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) { 303; CHECK-LABEL: test_x86_avx512_cvttsd2usi: 304; CHECK: ## BB#0: 305; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx 306; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax 307; CHECK-NEXT: addl %ecx, %eax 308; CHECK-NEXT: retq 309 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; 310 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; 311 %res2 = add i32 %res0, %res1 312 ret i32 %res2 313} 314declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone 315 316define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) { 317; CHECK-LABEL: test_x86_avx512_cvttsd2si: 318; CHECK: ## BB#0: 319; CHECK-NEXT: vcvttsd2si %xmm0, %ecx 320; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax 321; CHECK-NEXT: addl %ecx, %eax 322; CHECK-NEXT: retq 323 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; 324 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; 325 %res2 = add i32 %res0, %res1 326 ret i32 %res2 327} 328declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone 329 330 331 332define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { 333; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: 334; CHECK: ## BB#0: 335; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx 336; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax 337; CHECK-NEXT: addq %rcx, %rax 338; CHECK-NEXT: retq 339 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; 340 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; 341 %res2 = add i64 %res0, %res1 342 ret i64 %res2 343} 344declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone 345 346define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 347; CHECK-LABEL: test_x86_sse_cvtss2si64: 348; CHECK: ## BB#0: 349; CHECK-NEXT: vcvtss2si %xmm0, %rax 350; CHECK-NEXT: retq 351 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 352 ret i64 %res 353} 354declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 355 356 357define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 358; CHECK-LABEL: test_x86_sse_cvtsi642ss: 359; CHECK: ## BB#0: 360; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 361; CHECK-NEXT: retq 362 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 363 ret <4 x float> %res 364} 365declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 366 367 368define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) { 369; CHECK-LABEL: test_x86_avx512_cvttss2si: 370; CHECK: ## BB#0: 371; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx 372; CHECK-NEXT: vcvttss2si %xmm0, %eax 373; CHECK-NEXT: addl %ecx, %eax 374; CHECK-NEXT: retq 375 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; 376 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; 377 %res2 = add i32 %res0, %res1 378 ret i32 %res2 379} 380declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone 381 382define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { 383; CHECK-LABEL: test_x86_avx512_cvttss2si64: 384; CHECK: ## BB#0: 385; CHECK-NEXT: vcvttss2si %xmm0, %rcx 386; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax 387; CHECK-NEXT: addq %rcx, %rax 388; CHECK-NEXT: retq 389 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; 390 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; 391 %res2 = add i64 %res0, %res1 392 ret i64 %res2 393} 394declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone 395 396define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { 397; CHECK-LABEL: test_x86_avx512_cvttss2usi: 398; CHECK: ## BB#0: 399; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx 400; CHECK-NEXT: vcvttss2usi %xmm0, %eax 401; CHECK-NEXT: addl %ecx, %eax 402; CHECK-NEXT: retq 403 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; 404 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; 405 %res2 = add i32 %res0, %res1 406 ret i32 %res2 407} 408declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone 409 410define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { 411; CHECK-LABEL: test_x86_avx512_cvttss2usi64: 412; CHECK: ## BB#0: 413; CHECK-NEXT: vcvttss2usi %xmm0, %rcx 414; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax 415; CHECK-NEXT: addq %rcx, %rax 416; CHECK-NEXT: retq 417 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; 418 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; 419 %res2 = add i64 %res0, %res1 420 ret i64 %res2 421} 422declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone 423 424define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 425; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: 426; CHECK: ## BB#0: 427; CHECK-NEXT: vcvtsd2usi %xmm0, %rax 428; CHECK-NEXT: retq 429 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] 430 ret i64 %res 431} 432declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone 433 434define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 435; CHECK-LABEL: test_x86_vcvtph2ps_512: 436; CHECK: ## BB#0: 437; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 438; CHECK-NEXT: retq 439 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 440 ret <16 x float> %res 441} 442 443define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { 444; CHECK-LABEL: test_x86_vcvtph2ps_512_sae: 445; CHECK: ## BB#0: 446; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 447; CHECK-NEXT: retq 448 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 449 ret <16 x float> %res 450} 451 452define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { 453; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk: 454; CHECK: ## BB#0: 455; CHECK-NEXT: kmovw %edi, %k1 456; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} 457; CHECK-NEXT: vmovaps %zmm1, %zmm0 458; CHECK-NEXT: retq 459 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) 460 ret <16 x float> %res 461} 462 463define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { 464; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz: 465; CHECK: ## BB#0: 466; CHECK-NEXT: kmovw %edi, %k1 467; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} 468; CHECK-NEXT: retq 469 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) 470 ret <16 x float> %res 471} 472 473define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { 474; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz: 475; CHECK: ## BB#0: 476; CHECK-NEXT: kmovw %edi, %k1 477; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} 478; CHECK-NEXT: retq 479 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) 480 ret <16 x float> %res 481} 482 483declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 484 485 486define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) { 487; CHECK-LABEL: test_x86_vcvtps2ph_256: 488; CHECK: ## BB#0: 489; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0 490; CHECK-NEXT: retq 491 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 492 ret <16 x i16> %res 493} 494 495declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 496 497define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 498; CHECK-LABEL: test_x86_vbroadcast_ss_512: 499; CHECK: ## BB#0: 500; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 501; CHECK-NEXT: retq 502 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 503 ret <16 x float> %res 504} 505declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 506 507define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 508; CHECK-LABEL: test_x86_vbroadcast_sd_512: 509; CHECK: ## BB#0: 510; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 511; CHECK-NEXT: retq 512 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 513 ret <8 x double> %res 514} 515declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 516 517define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) { 518; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512: 519; CHECK: ## BB#0: 520; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 521; CHECK-NEXT: retq 522 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1] 523 ret <16 x float> %res 524} 525declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly 526 527define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { 528; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512: 529; CHECK: ## BB#0: 530; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 531; CHECK-NEXT: retq 532 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1] 533 ret <8 x double> %res 534} 535declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly 536 537define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) { 538; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512: 539; CHECK: ## BB#0: 540; CHECK-NEXT: kmovw %edi, %k1 541; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} 542; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z} 543; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 544; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 545; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 546; CHECK-NEXT: retq 547 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) 548 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) 549 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 550 %res3 = add <16 x i32> %res, %res1 551 %res4 = add <16 x i32> %res2, %res3 552 ret <16 x i32> %res4 553} 554declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) 555 556define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { 557; CHECK-LABEL: test_x86_pbroadcastd_i32_512: 558; CHECK: ## BB#0: 559; CHECK-NEXT: vpbroadcastd %edi, %zmm0 560; CHECK-NEXT: retq 561 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1] 562 ret <16 x i32> %res 563} 564declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly 565 566define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) { 567; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512: 568; CHECK: ## BB#0: 569; CHECK-NEXT: movzbl %dil, %eax 570; CHECK-NEXT: kmovw %eax, %k1 571; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} 572; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z} 573; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0 574; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 575; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 576; CHECK-NEXT: retq 577 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) 578 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) 579 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) 580 %res3 = add <8 x i64> %res, %res1 581 %res4 = add <8 x i64> %res2, %res3 582 ret <8 x i64> %res4 583} 584declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) 585 586define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { 587; CHECK-LABEL: test_x86_pbroadcastq_i64_512: 588; CHECK: ## BB#0: 589; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 590; CHECK-NEXT: retq 591 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1] 592 ret <8 x i64> %res 593} 594declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly 595 596define <16 x i32> @test_conflict_d(<16 x i32> %a) { 597; CHECK-LABEL: test_conflict_d: 598; CHECK: ## BB#0: 599; CHECK-NEXT: vpconflictd %zmm0, %zmm0 600; CHECK-NEXT: retq 601 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 602 ret <16 x i32> %res 603} 604 605declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 606 607define <8 x i64> @test_conflict_q(<8 x i64> %a) { 608; CHECK-LABEL: test_conflict_q: 609; CHECK: ## BB#0: 610; CHECK-NEXT: vpconflictq %zmm0, %zmm0 611; CHECK-NEXT: retq 612 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 613 ret <8 x i64> %res 614} 615 616declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 617 618define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { 619; CHECK-LABEL: test_maskz_conflict_d: 620; CHECK: ## BB#0: 621; CHECK-NEXT: kmovw %edi, %k1 622; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} 623; CHECK-NEXT: retq 624 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) 625 ret <16 x i32> %res 626} 627 628define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 629; CHECK-LABEL: test_mask_conflict_q: 630; CHECK: ## BB#0: 631; CHECK-NEXT: movzbl %dil, %eax 632; CHECK-NEXT: kmovw %eax, %k1 633; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1} 634; CHECK-NEXT: vmovaps %zmm1, %zmm0 635; CHECK-NEXT: retq 636 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 637 ret <8 x i64> %res 638} 639 640define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { 641; CHECK-LABEL: test_lzcnt_d: 642; CHECK: ## BB#0: 643; CHECK-NEXT: vplzcntd %zmm0, %zmm0 644; CHECK-NEXT: retq 645 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 646 ret <16 x i32> %res 647} 648 649declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 650 651define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { 652; CHECK-LABEL: test_lzcnt_q: 653; CHECK: ## BB#0: 654; CHECK-NEXT: vplzcntq %zmm0, %zmm0 655; CHECK-NEXT: retq 656 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 657 ret <8 x i64> %res 658} 659 660declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 661 662 663define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 664; CHECK-LABEL: test_mask_lzcnt_d: 665; CHECK: ## BB#0: 666; CHECK-NEXT: kmovw %edi, %k1 667; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1} 668; CHECK-NEXT: vmovaps %zmm1, %zmm0 669; CHECK-NEXT: retq 670 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 671 ret <16 x i32> %res 672} 673 674define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 675; CHECK-LABEL: test_mask_lzcnt_q: 676; CHECK: ## BB#0: 677; CHECK-NEXT: movzbl %dil, %eax 678; CHECK-NEXT: kmovw %eax, %k1 679; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1} 680; CHECK-NEXT: vmovaps %zmm1, %zmm0 681; CHECK-NEXT: retq 682 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 683 ret <8 x i64> %res 684} 685 686define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 687; CHECK-LABEL: test_x86_mask_blend_ps_512: 688; CHECK: ## BB#0: 689; CHECK-NEXT: kmovw %edi, %k1 690; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} 691; CHECK-NEXT: retq 692 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1] 693 ret <16 x float> %res 694} 695 696declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly 697 698define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 699; CHECK-LABEL: test_x86_mask_blend_pd_512: 700; CHECK: ## BB#0: 701; CHECK-NEXT: movzbl %dil, %eax 702; CHECK-NEXT: kmovw %eax, %k1 703; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} 704; CHECK-NEXT: retq 705 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1] 706 ret <8 x double> %res 707} 708 709define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) { 710; CHECK-LABEL: test_x86_mask_blend_pd_512_memop: 711; CHECK: ## BB#0: 712; CHECK-NEXT: movzbl %sil, %eax 713; CHECK-NEXT: kmovw %eax, %k1 714; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} 715; CHECK-NEXT: retq 716 %b = load <8 x double>, <8 x double>* %ptr 717 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1] 718 ret <8 x double> %res 719} 720declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly 721 722define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { 723; CHECK-LABEL: test_x86_mask_blend_d_512: 724; CHECK: ## BB#0: 725; CHECK-NEXT: kmovw %edi, %k1 726; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1} 727; CHECK-NEXT: retq 728 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1] 729 ret <16 x i32> %res 730} 731declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 732 733define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 734; CHECK-LABEL: test_x86_mask_blend_q_512: 735; CHECK: ## BB#0: 736; CHECK-NEXT: movzbl %dil, %eax 737; CHECK-NEXT: kmovw %eax, %k1 738; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1} 739; CHECK-NEXT: retq 740 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1] 741 ret <8 x i64> %res 742} 743declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 744 745 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 746; CHECK-LABEL: test_cmpps: 747; CHECK: ## BB#0: 748; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 749; CHECK-NEXT: kmovw %k0, %eax 750; CHECK-NEXT: retq 751 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) 752 ret i16 %res 753 } 754 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) 755 756 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 757; CHECK-LABEL: test_cmppd: 758; CHECK: ## BB#0: 759; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 760; CHECK-NEXT: kmovw %k0, %eax 761; CHECK-NEXT: retq 762 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) 763 ret i8 %res 764 } 765 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) 766 767 ; fp min - max 768define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 769; CHECK-LABEL: test_vmaxpd: 770; CHECK: ## BB#0: 771; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 772; CHECK-NEXT: retq 773 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, 774 <8 x double>zeroinitializer, i8 -1, i32 4) 775 ret <8 x double> %res 776} 777declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, 778 <8 x double>, i8, i32) 779 780define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 781; CHECK-LABEL: test_vminpd: 782; CHECK: ## BB#0: 783; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0 784; CHECK-NEXT: retq 785 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, 786 <8 x double>zeroinitializer, i8 -1, i32 4) 787 ret <8 x double> %res 788} 789declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, 790 <8 x double>, i8, i32) 791 792 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 793 794define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { 795; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512: 796; CHECK: ## BB#0: 797; CHECK-NEXT: kmovw %edi, %k1 798; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1} 799; CHECK-NEXT: vpabsd %zmm0, %zmm0 800; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 801; CHECK-NEXT: retq 802 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) 803 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) 804 %res2 = add <16 x i32> %res, %res1 805 ret <16 x i32> %res2 806} 807 808declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 809 810define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { 811; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512: 812; CHECK: ## BB#0: 813; CHECK-NEXT: movzbl %dil, %eax 814; CHECK-NEXT: kmovw %eax, %k1 815; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1} 816; CHECK-NEXT: vpabsq %zmm0, %zmm0 817; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 818; CHECK-NEXT: retq 819 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) 820 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) 821 %res2 = add <8 x i64> %res, %res1 822 ret <8 x i64> %res2 823} 824 825define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { 826; CHECK-LABEL: test_vptestmq: 827; CHECK: ## BB#0: 828; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 829; CHECK-NEXT: kmovw %k0, %eax 830; CHECK-NEXT: retq 831 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 832 ret i8 %res 833} 834declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 835 836define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) { 837; CHECK-LABEL: test_vptestmd: 838; CHECK: ## BB#0: 839; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 840; CHECK-NEXT: kmovw %k0, %eax 841; CHECK-NEXT: retq 842 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 843 ret i16 %res 844} 845declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 846 847define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) { 848; CHECK-LABEL: test_store1: 849; CHECK: ## BB#0: 850; CHECK-NEXT: kmovw %esi, %k1 851; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} 852; CHECK-NEXT: retq 853 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 854 ret void 855} 856 857declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 858 859define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) { 860; CHECK-LABEL: test_store2: 861; CHECK: ## BB#0: 862; CHECK-NEXT: kmovw %esi, %k1 863; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} 864; CHECK-NEXT: retq 865 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 866 ret void 867} 868 869declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) 870 871define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 872; CHECK-LABEL: test_mask_store_aligned_ps: 873; CHECK: ## BB#0: 874; CHECK-NEXT: kmovw %esi, %k1 875; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} 876; CHECK-NEXT: retq 877 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 878 ret void 879} 880 881declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) 882 883define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 884; CHECK-LABEL: test_mask_store_aligned_pd: 885; CHECK: ## BB#0: 886; CHECK-NEXT: kmovw %esi, %k1 887; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} 888; CHECK-NEXT: retq 889 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 890 ret void 891} 892 893declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) 894 895define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 896; CHECK-LABEL: test_maskz_load_aligned_ps: 897; CHECK: ## BB#0: 898; CHECK-NEXT: kmovw %esi, %k1 899; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} 900; CHECK-NEXT: retq 901 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 902 ret <16 x float> %res 903} 904 905declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) 906 907define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 908; CHECK-LABEL: test_maskz_load_aligned_pd: 909; CHECK: ## BB#0: 910; CHECK-NEXT: kmovw %esi, %k1 911; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} 912; CHECK-NEXT: retq 913 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 914 ret <8 x double> %res 915} 916 917declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8) 918 919define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 920; CHECK-LABEL: test_load_aligned_ps: 921; CHECK: ## BB#0: 922; CHECK-NEXT: vmovaps (%rdi), %zmm0 923; CHECK-NEXT: retq 924 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 925 ret <16 x float> %res 926} 927 928define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 929; CHECK-LABEL: test_load_aligned_pd: 930; CHECK: ## BB#0: 931; CHECK-NEXT: vmovapd (%rdi), %zmm0 932; CHECK-NEXT: retq 933 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 934 ret <8 x double> %res 935} 936 937declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) 938 939define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 940; CHECK-LABEL: test_valign_q: 941; CHECK: ## BB#0: 942; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 943; CHECK-NEXT: retq 944 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1) 945 ret <8 x i64> %res 946} 947 948define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 949; CHECK-LABEL: test_mask_valign_q: 950; CHECK: ## BB#0: 951; CHECK-NEXT: movzbl %dil, %eax 952; CHECK-NEXT: kmovw %eax, %k1 953; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} 954; CHECK-NEXT: vmovaps %zmm2, %zmm0 955; CHECK-NEXT: retq 956 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask) 957 ret <8 x i64> %res 958} 959 960declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 961 962define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 963; CHECK-LABEL: test_maskz_valign_d: 964; CHECK: ## BB#0: 965; CHECK-NEXT: kmovw %edi, %k1 966; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} 967; CHECK-NEXT: retq 968 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask) 969 ret <16 x i32> %res 970} 971 972declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 973 974define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 975; CHECK-LABEL: test_mask_store_ss: 976; CHECK: ## BB#0: 977; CHECK-NEXT: kmovw %esi, %k1 978; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} 979; CHECK-NEXT: retq 980 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask) 981 ret void 982} 983 984declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) 985 986define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 987; CHECK-LABEL: test_pcmpeq_d: 988; CHECK: ## BB#0: 989; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 990; CHECK-NEXT: kmovw %k0, %eax 991; CHECK-NEXT: retq 992 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 993 ret i16 %res 994} 995 996define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 997; CHECK-LABEL: test_mask_pcmpeq_d: 998; CHECK: ## BB#0: 999; CHECK-NEXT: kmovw %edi, %k1 1000; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1001; CHECK-NEXT: kmovw %k0, %eax 1002; CHECK-NEXT: retq 1003 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 1004 ret i16 %res 1005} 1006 1007declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 1008 1009define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 1010; CHECK-LABEL: test_pcmpeq_q: 1011; CHECK: ## BB#0: 1012; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 1013; CHECK-NEXT: kmovw %k0, %eax 1014; CHECK-NEXT: retq 1015 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 1016 ret i8 %res 1017} 1018 1019define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1020; CHECK-LABEL: test_mask_pcmpeq_q: 1021; CHECK: ## BB#0: 1022; CHECK-NEXT: movzbl %dil, %eax 1023; CHECK-NEXT: kmovw %eax, %k1 1024; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 1025; CHECK-NEXT: kmovw %k0, %eax 1026; CHECK-NEXT: retq 1027 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1028 ret i8 %res 1029} 1030 1031declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 1032 1033define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 1034; CHECK-LABEL: test_pcmpgt_d: 1035; CHECK: ## BB#0: 1036; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 1037; CHECK-NEXT: kmovw %k0, %eax 1038; CHECK-NEXT: retq 1039 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 1040 ret i16 %res 1041} 1042 1043define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1044; CHECK-LABEL: test_mask_pcmpgt_d: 1045; CHECK: ## BB#0: 1046; CHECK-NEXT: kmovw %edi, %k1 1047; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 1048; CHECK-NEXT: kmovw %k0, %eax 1049; CHECK-NEXT: retq 1050 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 1051 ret i16 %res 1052} 1053 1054declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 1055 1056define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 1057; CHECK-LABEL: test_pcmpgt_q: 1058; CHECK: ## BB#0: 1059; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 1060; CHECK-NEXT: kmovw %k0, %eax 1061; CHECK-NEXT: retq 1062 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 1063 ret i8 %res 1064} 1065 1066define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1067; CHECK-LABEL: test_mask_pcmpgt_q: 1068; CHECK: ## BB#0: 1069; CHECK-NEXT: movzbl %dil, %eax 1070; CHECK-NEXT: kmovw %eax, %k1 1071; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 1072; CHECK-NEXT: kmovw %k0, %eax 1073; CHECK-NEXT: retq 1074 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 1075 ret i8 %res 1076} 1077 1078declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 1079 1080define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 1081; CHECK-LABEL: test_cmp_d_512: 1082; CHECK: ## BB#0: 1083; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1084; CHECK-NEXT: kmovw %k0, %r8d 1085; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 1086; CHECK-NEXT: kmovw %k0, %r9d 1087; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 1088; CHECK-NEXT: kmovw %k0, %r10d 1089; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 1090; CHECK-NEXT: kmovw %k0, %esi 1091; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 1092; CHECK-NEXT: kmovw %k0, %edi 1093; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 1094; CHECK-NEXT: kmovw %k0, %eax 1095; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 1096; CHECK-NEXT: kmovw %k0, %ecx 1097; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 1098; CHECK-NEXT: kmovw %k0, %edx 1099; CHECK-NEXT: vmovd %r8d, %xmm0 1100; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1101; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1102; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1103; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1104; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1105; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1106; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1107; CHECK-NEXT: retq 1108 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 1109 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1110 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 1111 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1112 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 1113 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1114 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 1115 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1116 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 1117 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1118 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 1119 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1120 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 1121 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1122 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 1123 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1124 ret <8 x i16> %vec7 1125} 1126 1127define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1128; CHECK-LABEL: test_mask_cmp_d_512: 1129; CHECK: ## BB#0: 1130; CHECK-NEXT: kmovw %edi, %k1 1131; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1132; CHECK-NEXT: kmovw %k0, %r8d 1133; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1} 1134; CHECK-NEXT: kmovw %k0, %r9d 1135; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1} 1136; CHECK-NEXT: kmovw %k0, %r10d 1137; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} 1138; CHECK-NEXT: kmovw %k0, %esi 1139; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} 1140; CHECK-NEXT: kmovw %k0, %edi 1141; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 1142; CHECK-NEXT: kmovw %k0, %eax 1143; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1} 1144; CHECK-NEXT: kmovw %k0, %ecx 1145; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1} 1146; CHECK-NEXT: kmovw %k0, %edx 1147; CHECK-NEXT: vmovd %r8d, %xmm0 1148; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1149; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1150; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1151; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1152; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1153; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1154; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1155; CHECK-NEXT: retq 1156 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 1157 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1158 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 1159 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1160 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 1161 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1162 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 1163 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1164 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 1165 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1166 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 1167 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1168 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 1169 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1170 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 1171 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1172 ret <8 x i16> %vec7 1173} 1174 1175declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 1176 1177define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 1178; CHECK-LABEL: test_ucmp_d_512: 1179; CHECK: ## BB#0: 1180; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 1181; CHECK-NEXT: kmovw %k0, %r8d 1182; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 1183; CHECK-NEXT: kmovw %k0, %r9d 1184; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 1185; CHECK-NEXT: kmovw %k0, %r10d 1186; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 1187; CHECK-NEXT: kmovw %k0, %esi 1188; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 1189; CHECK-NEXT: kmovw %k0, %edi 1190; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 1191; CHECK-NEXT: kmovw %k0, %eax 1192; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 1193; CHECK-NEXT: kmovw %k0, %ecx 1194; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 1195; CHECK-NEXT: kmovw %k0, %edx 1196; CHECK-NEXT: vmovd %r8d, %xmm0 1197; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1198; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1199; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1200; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1201; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1202; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1203; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1204; CHECK-NEXT: retq 1205 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 1206 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1207 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 1208 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1209 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 1210 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1211 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 1212 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1213 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 1214 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1215 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 1216 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1217 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 1218 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1219 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 1220 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1221 ret <8 x i16> %vec7 1222} 1223 1224define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1225; CHECK-LABEL: test_mask_ucmp_d_512: 1226; CHECK: ## BB#0: 1227; CHECK-NEXT: kmovw %edi, %k1 1228; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1} 1229; CHECK-NEXT: kmovw %k0, %r8d 1230; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 1231; CHECK-NEXT: kmovw %k0, %r9d 1232; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1} 1233; CHECK-NEXT: kmovw %k0, %r10d 1234; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} 1235; CHECK-NEXT: kmovw %k0, %esi 1236; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} 1237; CHECK-NEXT: kmovw %k0, %edi 1238; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} 1239; CHECK-NEXT: kmovw %k0, %eax 1240; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} 1241; CHECK-NEXT: kmovw %k0, %ecx 1242; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1} 1243; CHECK-NEXT: kmovw %k0, %edx 1244; CHECK-NEXT: vmovd %r8d, %xmm0 1245; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0 1246; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0 1247; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0 1248; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0 1249; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 1250; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1251; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 1252; CHECK-NEXT: retq 1253 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 1254 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 1255 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 1256 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 1257 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 1258 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 1259 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 1260 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 1261 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 1262 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 1263 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 1264 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 1265 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 1266 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 1267 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 1268 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 1269 ret <8 x i16> %vec7 1270} 1271 1272declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 1273 1274define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1275; CHECK-LABEL: test_cmp_q_512: 1276; CHECK: ## BB#0: 1277; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 1278; CHECK-NEXT: kmovw %k0, %r8d 1279; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 1280; CHECK-NEXT: kmovw %k0, %r9d 1281; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 1282; CHECK-NEXT: kmovw %k0, %r10d 1283; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 1284; CHECK-NEXT: kmovw %k0, %r11d 1285; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 1286; CHECK-NEXT: kmovw %k0, %edi 1287; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 1288; CHECK-NEXT: kmovw %k0, %eax 1289; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 1290; CHECK-NEXT: kmovw %k0, %ecx 1291; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 1292; CHECK-NEXT: kmovw %k0, %edx 1293; CHECK-NEXT: movzbl %r8b, %esi 1294; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1295; CHECK-NEXT: movzbl %r9b, %esi 1296; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1297; CHECK-NEXT: movzbl %r10b, %esi 1298; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1299; CHECK-NEXT: movzbl %r11b, %esi 1300; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1301; CHECK-NEXT: movzbl %dil, %esi 1302; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1303; CHECK-NEXT: movzbl %al, %eax 1304; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1305; CHECK-NEXT: movzbl %cl, %eax 1306; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1307; CHECK-NEXT: movzbl %dl, %eax 1308; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1309; CHECK-NEXT: retq 1310 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1311 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1312 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1313 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1314 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1315 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1316 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1317 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1318 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1319 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1320 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1321 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1322 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1323 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1324 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1325 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1326 ret <8 x i8> %vec7 1327} 1328 1329define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1330; CHECK-LABEL: test_mask_cmp_q_512: 1331; CHECK: ## BB#0: 1332; CHECK-NEXT: movzbl %dil, %eax 1333; CHECK-NEXT: kmovw %eax, %k1 1334; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 1335; CHECK-NEXT: kmovw %k0, %r8d 1336; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1} 1337; CHECK-NEXT: kmovw %k0, %r9d 1338; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1} 1339; CHECK-NEXT: kmovw %k0, %r10d 1340; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} 1341; CHECK-NEXT: kmovw %k0, %r11d 1342; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} 1343; CHECK-NEXT: kmovw %k0, %edi 1344; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 1345; CHECK-NEXT: kmovw %k0, %eax 1346; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} 1347; CHECK-NEXT: kmovw %k0, %ecx 1348; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1} 1349; CHECK-NEXT: kmovw %k0, %edx 1350; CHECK-NEXT: movzbl %r8b, %esi 1351; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1352; CHECK-NEXT: movzbl %r9b, %esi 1353; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1354; CHECK-NEXT: movzbl %r10b, %esi 1355; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1356; CHECK-NEXT: movzbl %r11b, %esi 1357; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1358; CHECK-NEXT: movzbl %dil, %esi 1359; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1360; CHECK-NEXT: movzbl %al, %eax 1361; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1362; CHECK-NEXT: movzbl %cl, %eax 1363; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1364; CHECK-NEXT: movzbl %dl, %eax 1365; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1366; CHECK-NEXT: retq 1367 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1368 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1369 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1370 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1371 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1372 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1373 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1374 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1375 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1376 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1377 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1378 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1379 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1380 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1381 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1382 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1383 ret <8 x i8> %vec7 1384} 1385 1386declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1387 1388define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 1389; CHECK-LABEL: test_ucmp_q_512: 1390; CHECK: ## BB#0: 1391; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 1392; CHECK-NEXT: kmovw %k0, %r8d 1393; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 1394; CHECK-NEXT: kmovw %k0, %r9d 1395; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 1396; CHECK-NEXT: kmovw %k0, %r10d 1397; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 1398; CHECK-NEXT: kmovw %k0, %r11d 1399; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 1400; CHECK-NEXT: kmovw %k0, %edi 1401; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 1402; CHECK-NEXT: kmovw %k0, %eax 1403; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 1404; CHECK-NEXT: kmovw %k0, %ecx 1405; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 1406; CHECK-NEXT: kmovw %k0, %edx 1407; CHECK-NEXT: movzbl %r8b, %esi 1408; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1409; CHECK-NEXT: movzbl %r9b, %esi 1410; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1411; CHECK-NEXT: movzbl %r10b, %esi 1412; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1413; CHECK-NEXT: movzbl %r11b, %esi 1414; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1415; CHECK-NEXT: movzbl %dil, %esi 1416; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1417; CHECK-NEXT: movzbl %al, %eax 1418; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1419; CHECK-NEXT: movzbl %cl, %eax 1420; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1421; CHECK-NEXT: movzbl %dl, %eax 1422; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1423; CHECK-NEXT: retq 1424 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 1425 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1426 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 1427 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1428 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 1429 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1430 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 1431 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1432 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 1433 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1434 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 1435 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1436 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 1437 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1438 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 1439 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1440 ret <8 x i8> %vec7 1441} 1442 1443define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1444; CHECK-LABEL: test_mask_ucmp_q_512: 1445; CHECK: ## BB#0: 1446; CHECK-NEXT: movzbl %dil, %eax 1447; CHECK-NEXT: kmovw %eax, %k1 1448; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1} 1449; CHECK-NEXT: kmovw %k0, %r8d 1450; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 1451; CHECK-NEXT: kmovw %k0, %r9d 1452; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} 1453; CHECK-NEXT: kmovw %k0, %r10d 1454; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} 1455; CHECK-NEXT: kmovw %k0, %r11d 1456; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} 1457; CHECK-NEXT: kmovw %k0, %edi 1458; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} 1459; CHECK-NEXT: kmovw %k0, %eax 1460; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} 1461; CHECK-NEXT: kmovw %k0, %ecx 1462; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1} 1463; CHECK-NEXT: kmovw %k0, %edx 1464; CHECK-NEXT: movzbl %r8b, %esi 1465; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 1466; CHECK-NEXT: movzbl %r9b, %esi 1467; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 1468; CHECK-NEXT: movzbl %r10b, %esi 1469; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 1470; CHECK-NEXT: movzbl %r11b, %esi 1471; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0 1472; CHECK-NEXT: movzbl %dil, %esi 1473; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 1474; CHECK-NEXT: movzbl %al, %eax 1475; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1476; CHECK-NEXT: movzbl %cl, %eax 1477; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1478; CHECK-NEXT: movzbl %dl, %eax 1479; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1480; CHECK-NEXT: retq 1481 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 1482 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 1483 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 1484 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 1485 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 1486 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 1487 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 1488 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 1489 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 1490 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 1491 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 1492 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 1493 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 1494 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 1495 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 1496 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 1497 ret <8 x i8> %vec7 1498} 1499 1500declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 1501 1502define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 1503; CHECK-LABEL: test_mask_vextractf32x4: 1504; CHECK: ## BB#0: 1505; CHECK-NEXT: kmovw %edi, %k1 1506; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} 1507; CHECK-NEXT: retq 1508 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) 1509 ret <4 x float> %res 1510} 1511 1512declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8) 1513 1514define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 1515; CHECK-LABEL: test_mask_vextracti64x4: 1516; CHECK: ## BB#0: 1517; CHECK-NEXT: kmovw %edi, %k1 1518; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1} 1519; CHECK-NEXT: retq 1520 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask) 1521 ret <4 x i64> %res 1522} 1523 1524declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8) 1525 1526define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 1527; CHECK-LABEL: test_maskz_vextracti32x4: 1528; CHECK: ## BB#0: 1529; CHECK-NEXT: kmovw %edi, %k1 1530; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} 1531; CHECK-NEXT: retq 1532 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) 1533 ret <4 x i32> %res 1534} 1535 1536declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8) 1537 1538define <4 x double> @test_vextractf64x4(<8 x double> %a) { 1539; CHECK-LABEL: test_vextractf64x4: 1540; CHECK: ## BB#0: 1541; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0 1542; CHECK-NEXT: retq 1543 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1) 1544 ret <4 x double> %res 1545} 1546 1547declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) 1548 1549define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 1550; CHECK-LABEL: test_x86_avx512_pslli_d: 1551; CHECK: ## BB#0: 1552; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 1553; CHECK-NEXT: retq 1554 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1555 ret <16 x i32> %res 1556} 1557 1558define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1559; CHECK-LABEL: test_x86_avx512_mask_pslli_d: 1560; CHECK: ## BB#0: 1561; CHECK-NEXT: kmovw %edi, %k1 1562; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} 1563; CHECK-NEXT: vmovaps %zmm1, %zmm0 1564; CHECK-NEXT: retq 1565 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1566 ret <16 x i32> %res 1567} 1568 1569define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 1570; CHECK-LABEL: test_x86_avx512_maskz_pslli_d: 1571; CHECK: ## BB#0: 1572; CHECK-NEXT: kmovw %edi, %k1 1573; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} 1574; CHECK-NEXT: retq 1575 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1576 ret <16 x i32> %res 1577} 1578 1579declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1580 1581define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 1582; CHECK-LABEL: test_x86_avx512_pslli_q: 1583; CHECK: ## BB#0: 1584; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 1585; CHECK-NEXT: retq 1586 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1587 ret <8 x i64> %res 1588} 1589 1590define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1591; CHECK-LABEL: test_x86_avx512_mask_pslli_q: 1592; CHECK: ## BB#0: 1593; CHECK-NEXT: movzbl %dil, %eax 1594; CHECK-NEXT: kmovw %eax, %k1 1595; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} 1596; CHECK-NEXT: vmovaps %zmm1, %zmm0 1597; CHECK-NEXT: retq 1598 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1599 ret <8 x i64> %res 1600} 1601 1602define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 1603; CHECK-LABEL: test_x86_avx512_maskz_pslli_q: 1604; CHECK: ## BB#0: 1605; CHECK-NEXT: movzbl %dil, %eax 1606; CHECK-NEXT: kmovw %eax, %k1 1607; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 1608; CHECK-NEXT: retq 1609 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1610 ret <8 x i64> %res 1611} 1612 1613declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1614 1615define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 1616; CHECK-LABEL: test_x86_avx512_psrli_d: 1617; CHECK: ## BB#0: 1618; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 1619; CHECK-NEXT: retq 1620 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1621 ret <16 x i32> %res 1622} 1623 1624define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1625; CHECK-LABEL: test_x86_avx512_mask_psrli_d: 1626; CHECK: ## BB#0: 1627; CHECK-NEXT: kmovw %edi, %k1 1628; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} 1629; CHECK-NEXT: vmovaps %zmm1, %zmm0 1630; CHECK-NEXT: retq 1631 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1632 ret <16 x i32> %res 1633} 1634 1635define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 1636; CHECK-LABEL: test_x86_avx512_maskz_psrli_d: 1637; CHECK: ## BB#0: 1638; CHECK-NEXT: kmovw %edi, %k1 1639; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 1640; CHECK-NEXT: retq 1641 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1642 ret <16 x i32> %res 1643} 1644 1645declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1646 1647define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 1648; CHECK-LABEL: test_x86_avx512_psrli_q: 1649; CHECK: ## BB#0: 1650; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 1651; CHECK-NEXT: retq 1652 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1653 ret <8 x i64> %res 1654} 1655 1656define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1657; CHECK-LABEL: test_x86_avx512_mask_psrli_q: 1658; CHECK: ## BB#0: 1659; CHECK-NEXT: movzbl %dil, %eax 1660; CHECK-NEXT: kmovw %eax, %k1 1661; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} 1662; CHECK-NEXT: vmovaps %zmm1, %zmm0 1663; CHECK-NEXT: retq 1664 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1665 ret <8 x i64> %res 1666} 1667 1668define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 1669; CHECK-LABEL: test_x86_avx512_maskz_psrli_q: 1670; CHECK: ## BB#0: 1671; CHECK-NEXT: movzbl %dil, %eax 1672; CHECK-NEXT: kmovw %eax, %k1 1673; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 1674; CHECK-NEXT: retq 1675 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1676 ret <8 x i64> %res 1677} 1678 1679declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1680 1681define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 1682; CHECK-LABEL: test_x86_avx512_psrai_d: 1683; CHECK: ## BB#0: 1684; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 1685; CHECK-NEXT: retq 1686 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1687 ret <16 x i32> %res 1688} 1689 1690define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1691; CHECK-LABEL: test_x86_avx512_mask_psrai_d: 1692; CHECK: ## BB#0: 1693; CHECK-NEXT: kmovw %edi, %k1 1694; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} 1695; CHECK-NEXT: vmovaps %zmm1, %zmm0 1696; CHECK-NEXT: retq 1697 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1698 ret <16 x i32> %res 1699} 1700 1701define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 1702; CHECK-LABEL: test_x86_avx512_maskz_psrai_d: 1703; CHECK: ## BB#0: 1704; CHECK-NEXT: kmovw %edi, %k1 1705; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 1706; CHECK-NEXT: retq 1707 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1708 ret <16 x i32> %res 1709} 1710 1711declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1712 1713define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 1714; CHECK-LABEL: test_x86_avx512_psrai_q: 1715; CHECK: ## BB#0: 1716; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 1717; CHECK-NEXT: retq 1718 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1719 ret <8 x i64> %res 1720} 1721 1722define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1723; CHECK-LABEL: test_x86_avx512_mask_psrai_q: 1724; CHECK: ## BB#0: 1725; CHECK-NEXT: movzbl %dil, %eax 1726; CHECK-NEXT: kmovw %eax, %k1 1727; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} 1728; CHECK-NEXT: vmovaps %zmm1, %zmm0 1729; CHECK-NEXT: retq 1730 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1731 ret <8 x i64> %res 1732} 1733 1734define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 1735; CHECK-LABEL: test_x86_avx512_maskz_psrai_q: 1736; CHECK: ## BB#0: 1737; CHECK-NEXT: movzbl %dil, %eax 1738; CHECK-NEXT: kmovw %eax, %k1 1739; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 1740; CHECK-NEXT: retq 1741 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1742 ret <8 x i64> %res 1743} 1744 1745declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1746 1747define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 1748; CHECK-LABEL: test_x86_avx512_psll_d: 1749; CHECK: ## BB#0: 1750; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 1751; CHECK-NEXT: retq 1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1753 ret <16 x i32> %res 1754} 1755 1756define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1757; CHECK-LABEL: test_x86_avx512_mask_psll_d: 1758; CHECK: ## BB#0: 1759; CHECK-NEXT: kmovw %edi, %k1 1760; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} 1761; CHECK-NEXT: vmovaps %zmm2, %zmm0 1762; CHECK-NEXT: retq 1763 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1764 ret <16 x i32> %res 1765} 1766 1767define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1768; CHECK-LABEL: test_x86_avx512_maskz_psll_d: 1769; CHECK: ## BB#0: 1770; CHECK-NEXT: kmovw %edi, %k1 1771; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 1772; CHECK-NEXT: retq 1773 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1774 ret <16 x i32> %res 1775} 1776 1777declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1778 1779define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 1780; CHECK-LABEL: test_x86_avx512_psll_q: 1781; CHECK: ## BB#0: 1782; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 1783; CHECK-NEXT: retq 1784 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1785 ret <8 x i64> %res 1786} 1787 1788define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1789; CHECK-LABEL: test_x86_avx512_mask_psll_q: 1790; CHECK: ## BB#0: 1791; CHECK-NEXT: movzbl %dil, %eax 1792; CHECK-NEXT: kmovw %eax, %k1 1793; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 1794; CHECK-NEXT: vmovaps %zmm2, %zmm0 1795; CHECK-NEXT: retq 1796 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1797 ret <8 x i64> %res 1798} 1799 1800define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1801; CHECK-LABEL: test_x86_avx512_maskz_psll_q: 1802; CHECK: ## BB#0: 1803; CHECK-NEXT: movzbl %dil, %eax 1804; CHECK-NEXT: kmovw %eax, %k1 1805; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 1806; CHECK-NEXT: retq 1807 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1808 ret <8 x i64> %res 1809} 1810 1811declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1812 1813define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 1814; CHECK-LABEL: test_x86_avx512_psrl_d: 1815; CHECK: ## BB#0: 1816; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 1817; CHECK-NEXT: retq 1818 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1819 ret <16 x i32> %res 1820} 1821 1822define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1823; CHECK-LABEL: test_x86_avx512_mask_psrl_d: 1824; CHECK: ## BB#0: 1825; CHECK-NEXT: kmovw %edi, %k1 1826; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 1827; CHECK-NEXT: vmovaps %zmm2, %zmm0 1828; CHECK-NEXT: retq 1829 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1830 ret <16 x i32> %res 1831} 1832 1833define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1834; CHECK-LABEL: test_x86_avx512_maskz_psrl_d: 1835; CHECK: ## BB#0: 1836; CHECK-NEXT: kmovw %edi, %k1 1837; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 1838; CHECK-NEXT: retq 1839 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1840 ret <16 x i32> %res 1841} 1842 1843declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1844 1845define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 1846; CHECK-LABEL: test_x86_avx512_psrl_q: 1847; CHECK: ## BB#0: 1848; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 1849; CHECK-NEXT: retq 1850 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1851 ret <8 x i64> %res 1852} 1853 1854define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1855; CHECK-LABEL: test_x86_avx512_mask_psrl_q: 1856; CHECK: ## BB#0: 1857; CHECK-NEXT: movzbl %dil, %eax 1858; CHECK-NEXT: kmovw %eax, %k1 1859; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 1860; CHECK-NEXT: vmovaps %zmm2, %zmm0 1861; CHECK-NEXT: retq 1862 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1863 ret <8 x i64> %res 1864} 1865 1866define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1867; CHECK-LABEL: test_x86_avx512_maskz_psrl_q: 1868; CHECK: ## BB#0: 1869; CHECK-NEXT: movzbl %dil, %eax 1870; CHECK-NEXT: kmovw %eax, %k1 1871; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 1872; CHECK-NEXT: retq 1873 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1874 ret <8 x i64> %res 1875} 1876 1877declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1878 1879define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 1880; CHECK-LABEL: test_x86_avx512_psra_d: 1881; CHECK: ## BB#0: 1882; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 1883; CHECK-NEXT: retq 1884 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1885 ret <16 x i32> %res 1886} 1887 1888define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1889; CHECK-LABEL: test_x86_avx512_mask_psra_d: 1890; CHECK: ## BB#0: 1891; CHECK-NEXT: kmovw %edi, %k1 1892; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 1893; CHECK-NEXT: vmovaps %zmm2, %zmm0 1894; CHECK-NEXT: retq 1895 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1896 ret <16 x i32> %res 1897} 1898 1899define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1900; CHECK-LABEL: test_x86_avx512_maskz_psra_d: 1901; CHECK: ## BB#0: 1902; CHECK-NEXT: kmovw %edi, %k1 1903; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 1904; CHECK-NEXT: retq 1905 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1906 ret <16 x i32> %res 1907} 1908 1909declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1910 1911define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 1912; CHECK-LABEL: test_x86_avx512_psra_q: 1913; CHECK: ## BB#0: 1914; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 1915; CHECK-NEXT: retq 1916 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1917 ret <8 x i64> %res 1918} 1919 1920define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1921; CHECK-LABEL: test_x86_avx512_mask_psra_q: 1922; CHECK: ## BB#0: 1923; CHECK-NEXT: movzbl %dil, %eax 1924; CHECK-NEXT: kmovw %eax, %k1 1925; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 1926; CHECK-NEXT: vmovaps %zmm2, %zmm0 1927; CHECK-NEXT: retq 1928 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1929 ret <8 x i64> %res 1930} 1931 1932define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1933; CHECK-LABEL: test_x86_avx512_maskz_psra_q: 1934; CHECK: ## BB#0: 1935; CHECK-NEXT: movzbl %dil, %eax 1936; CHECK-NEXT: kmovw %eax, %k1 1937; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 1938; CHECK-NEXT: retq 1939 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1940 ret <8 x i64> %res 1941} 1942 1943declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1944 1945define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 1946; CHECK-LABEL: test_x86_avx512_psllv_d: 1947; CHECK: ## BB#0: 1948; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 1949; CHECK-NEXT: retq 1950 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1951 ret <16 x i32> %res 1952} 1953 1954define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1955; CHECK-LABEL: test_x86_avx512_mask_psllv_d: 1956; CHECK: ## BB#0: 1957; CHECK-NEXT: kmovw %edi, %k1 1958; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 1959; CHECK-NEXT: vmovaps %zmm2, %zmm0 1960; CHECK-NEXT: retq 1961 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1962 ret <16 x i32> %res 1963} 1964 1965define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1966; CHECK-LABEL: test_x86_avx512_maskz_psllv_d: 1967; CHECK: ## BB#0: 1968; CHECK-NEXT: kmovw %edi, %k1 1969; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1970; CHECK-NEXT: retq 1971 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1972 ret <16 x i32> %res 1973} 1974 1975declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1976 1977define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 1978; CHECK-LABEL: test_x86_avx512_psllv_q: 1979; CHECK: ## BB#0: 1980; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 1981; CHECK-NEXT: retq 1982 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1983 ret <8 x i64> %res 1984} 1985 1986define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1987; CHECK-LABEL: test_x86_avx512_mask_psllv_q: 1988; CHECK: ## BB#0: 1989; CHECK-NEXT: movzbl %dil, %eax 1990; CHECK-NEXT: kmovw %eax, %k1 1991; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 1992; CHECK-NEXT: vmovaps %zmm2, %zmm0 1993; CHECK-NEXT: retq 1994 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1995 ret <8 x i64> %res 1996} 1997 1998define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1999; CHECK-LABEL: test_x86_avx512_maskz_psllv_q: 2000; CHECK: ## BB#0: 2001; CHECK-NEXT: movzbl %dil, %eax 2002; CHECK-NEXT: kmovw %eax, %k1 2003; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 2004; CHECK-NEXT: retq 2005 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2006 ret <8 x i64> %res 2007} 2008 2009declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2010 2011 2012define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 2013; CHECK-LABEL: test_x86_avx512_psrav_d: 2014; CHECK: ## BB#0: 2015; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 2016; CHECK-NEXT: retq 2017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 2018 ret <16 x i32> %res 2019} 2020 2021define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 2022; CHECK-LABEL: test_x86_avx512_mask_psrav_d: 2023; CHECK: ## BB#0: 2024; CHECK-NEXT: kmovw %edi, %k1 2025; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 2026; CHECK-NEXT: vmovaps %zmm2, %zmm0 2027; CHECK-NEXT: retq 2028 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 2029 ret <16 x i32> %res 2030} 2031 2032define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 2033; CHECK-LABEL: test_x86_avx512_maskz_psrav_d: 2034; CHECK: ## BB#0: 2035; CHECK-NEXT: kmovw %edi, %k1 2036; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 2037; CHECK-NEXT: retq 2038 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 2039 ret <16 x i32> %res 2040} 2041 2042declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 2043 2044define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 2045; CHECK-LABEL: test_x86_avx512_psrav_q: 2046; CHECK: ## BB#0: 2047; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 2048; CHECK-NEXT: retq 2049 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 2050 ret <8 x i64> %res 2051} 2052 2053define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 2054; CHECK-LABEL: test_x86_avx512_mask_psrav_q: 2055; CHECK: ## BB#0: 2056; CHECK-NEXT: movzbl %dil, %eax 2057; CHECK-NEXT: kmovw %eax, %k1 2058; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 2059; CHECK-NEXT: vmovaps %zmm2, %zmm0 2060; CHECK-NEXT: retq 2061 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 2062 ret <8 x i64> %res 2063} 2064 2065define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 2066; CHECK-LABEL: test_x86_avx512_maskz_psrav_q: 2067; CHECK: ## BB#0: 2068; CHECK-NEXT: movzbl %dil, %eax 2069; CHECK-NEXT: kmovw %eax, %k1 2070; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 2071; CHECK-NEXT: retq 2072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2073 ret <8 x i64> %res 2074} 2075 2076declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2077 2078define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 2079; CHECK-LABEL: test_x86_avx512_psrlv_d: 2080; CHECK: ## BB#0: 2081; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 2082; CHECK-NEXT: retq 2083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 2084 ret <16 x i32> %res 2085} 2086 2087define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 2088; CHECK-LABEL: test_x86_avx512_mask_psrlv_d: 2089; CHECK: ## BB#0: 2090; CHECK-NEXT: kmovw %edi, %k1 2091; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 2092; CHECK-NEXT: vmovaps %zmm2, %zmm0 2093; CHECK-NEXT: retq 2094 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 2095 ret <16 x i32> %res 2096} 2097 2098define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 2099; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d: 2100; CHECK: ## BB#0: 2101; CHECK-NEXT: kmovw %edi, %k1 2102; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 2103; CHECK-NEXT: retq 2104 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 2105 ret <16 x i32> %res 2106} 2107 2108declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 2109 2110define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 2111; CHECK-LABEL: test_x86_avx512_psrlv_q: 2112; CHECK: ## BB#0: 2113; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 2114; CHECK-NEXT: retq 2115 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 2116 ret <8 x i64> %res 2117} 2118 2119define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 2120; CHECK-LABEL: test_x86_avx512_mask_psrlv_q: 2121; CHECK: ## BB#0: 2122; CHECK-NEXT: movzbl %dil, %eax 2123; CHECK-NEXT: kmovw %eax, %k1 2124; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 2125; CHECK-NEXT: vmovaps %zmm2, %zmm0 2126; CHECK-NEXT: retq 2127 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 2128 ret <8 x i64> %res 2129} 2130 2131define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 2132; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q: 2133; CHECK: ## BB#0: 2134; CHECK-NEXT: movzbl %dil, %eax 2135; CHECK-NEXT: kmovw %eax, %k1 2136; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 2137; CHECK-NEXT: retq 2138 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 2139 ret <8 x i64> %res 2140} 2141 2142declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 2143 2144define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 2145; CHECK-LABEL: test_x86_avx512_psrlv_q_memop: 2146; CHECK: ## BB#0: 2147; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 2148; CHECK-NEXT: retq 2149 %b = load <8 x i64>, <8 x i64>* %ptr 2150 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2151 ret <8 x i64> %res 2152} 2153 2154declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2155declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 2156declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 2157 2158define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 2159; CHECK-LABEL: test_vsubps_rn: 2160; CHECK: ## BB#0: 2161; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 2162; CHECK-NEXT: retq 2163 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2164 <16 x float> zeroinitializer, i16 -1, i32 0) 2165 ret <16 x float> %res 2166} 2167 2168define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 2169; CHECK-LABEL: test_vsubps_rd: 2170; CHECK: ## BB#0: 2171; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 2172; CHECK-NEXT: retq 2173 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2174 <16 x float> zeroinitializer, i16 -1, i32 1) 2175 ret <16 x float> %res 2176} 2177 2178define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 2179; CHECK-LABEL: test_vsubps_ru: 2180; CHECK: ## BB#0: 2181; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 2182; CHECK-NEXT: retq 2183 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2184 <16 x float> zeroinitializer, i16 -1, i32 2) 2185 ret <16 x float> %res 2186} 2187 2188define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 2189; CHECK-LABEL: test_vsubps_rz: 2190; CHECK: ## BB#0: 2191; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 2192; CHECK-NEXT: retq 2193 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 2194 <16 x float> zeroinitializer, i16 -1, i32 3) 2195 ret <16 x float> %res 2196} 2197 2198define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 2199; CHECK-LABEL: test_vmulps_rn: 2200; CHECK: ## BB#0: 2201; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 2202; CHECK-NEXT: retq 2203 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2204 <16 x float> zeroinitializer, i16 -1, i32 0) 2205 ret <16 x float> %res 2206} 2207 2208define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 2209; CHECK-LABEL: test_vmulps_rd: 2210; CHECK: ## BB#0: 2211; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 2212; CHECK-NEXT: retq 2213 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2214 <16 x float> zeroinitializer, i16 -1, i32 1) 2215 ret <16 x float> %res 2216} 2217 2218define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 2219; CHECK-LABEL: test_vmulps_ru: 2220; CHECK: ## BB#0: 2221; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 2222; CHECK-NEXT: retq 2223 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2224 <16 x float> zeroinitializer, i16 -1, i32 2) 2225 ret <16 x float> %res 2226} 2227 2228define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 2229; CHECK-LABEL: test_vmulps_rz: 2230; CHECK: ## BB#0: 2231; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 2232; CHECK-NEXT: retq 2233 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2234 <16 x float> zeroinitializer, i16 -1, i32 3) 2235 ret <16 x float> %res 2236} 2237 2238;; mask float 2239define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2240; CHECK-LABEL: test_vmulps_mask_rn: 2241; CHECK: ## BB#0: 2242; CHECK-NEXT: kmovw %edi, %k1 2243; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2244; CHECK-NEXT: retq 2245 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2246 <16 x float> zeroinitializer, i16 %mask, i32 0) 2247 ret <16 x float> %res 2248} 2249 2250define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2251; CHECK-LABEL: test_vmulps_mask_rd: 2252; CHECK: ## BB#0: 2253; CHECK-NEXT: kmovw %edi, %k1 2254; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2255; CHECK-NEXT: retq 2256 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2257 <16 x float> zeroinitializer, i16 %mask, i32 1) 2258 ret <16 x float> %res 2259} 2260 2261define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2262; CHECK-LABEL: test_vmulps_mask_ru: 2263; CHECK: ## BB#0: 2264; CHECK-NEXT: kmovw %edi, %k1 2265; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2266; CHECK-NEXT: retq 2267 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2268 <16 x float> zeroinitializer, i16 %mask, i32 2) 2269 ret <16 x float> %res 2270} 2271 2272define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 2273; CHECK-LABEL: test_vmulps_mask_rz: 2274; CHECK: ## BB#0: 2275; CHECK-NEXT: kmovw %edi, %k1 2276; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2277; CHECK-NEXT: retq 2278 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2279 <16 x float> zeroinitializer, i16 %mask, i32 3) 2280 ret <16 x float> %res 2281} 2282 2283;; With Passthru value 2284define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2285; CHECK-LABEL: test_vmulps_mask_passthru_rn: 2286; CHECK: ## BB#0: 2287; CHECK-NEXT: kmovw %edi, %k1 2288; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2289; CHECK-NEXT: vmovaps %zmm2, %zmm0 2290; CHECK-NEXT: retq 2291 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2292 <16 x float> %passthru, i16 %mask, i32 0) 2293 ret <16 x float> %res 2294} 2295 2296define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2297; CHECK-LABEL: test_vmulps_mask_passthru_rd: 2298; CHECK: ## BB#0: 2299; CHECK-NEXT: kmovw %edi, %k1 2300; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2301; CHECK-NEXT: vmovaps %zmm2, %zmm0 2302; CHECK-NEXT: retq 2303 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2304 <16 x float> %passthru, i16 %mask, i32 1) 2305 ret <16 x float> %res 2306} 2307 2308define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2309; CHECK-LABEL: test_vmulps_mask_passthru_ru: 2310; CHECK: ## BB#0: 2311; CHECK-NEXT: kmovw %edi, %k1 2312; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2313; CHECK-NEXT: vmovaps %zmm2, %zmm0 2314; CHECK-NEXT: retq 2315 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2316 <16 x float> %passthru, i16 %mask, i32 2) 2317 ret <16 x float> %res 2318} 2319 2320define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 2321; CHECK-LABEL: test_vmulps_mask_passthru_rz: 2322; CHECK: ## BB#0: 2323; CHECK-NEXT: kmovw %edi, %k1 2324; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 2325; CHECK-NEXT: vmovaps %zmm2, %zmm0 2326; CHECK-NEXT: retq 2327 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 2328 <16 x float> %passthru, i16 %mask, i32 3) 2329 ret <16 x float> %res 2330} 2331 2332;; mask double 2333define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2334; CHECK-LABEL: test_vmulpd_mask_rn: 2335; CHECK: ## BB#0: 2336; CHECK-NEXT: movzbl %dil, %eax 2337; CHECK-NEXT: kmovw %eax, %k1 2338; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2339; CHECK-NEXT: retq 2340 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2341 <8 x double> zeroinitializer, i8 %mask, i32 0) 2342 ret <8 x double> %res 2343} 2344 2345define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2346; CHECK-LABEL: test_vmulpd_mask_rd: 2347; CHECK: ## BB#0: 2348; CHECK-NEXT: movzbl %dil, %eax 2349; CHECK-NEXT: kmovw %eax, %k1 2350; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2351; CHECK-NEXT: retq 2352 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2353 <8 x double> zeroinitializer, i8 %mask, i32 1) 2354 ret <8 x double> %res 2355} 2356 2357define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2358; CHECK-LABEL: test_vmulpd_mask_ru: 2359; CHECK: ## BB#0: 2360; CHECK-NEXT: movzbl %dil, %eax 2361; CHECK-NEXT: kmovw %eax, %k1 2362; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2363; CHECK-NEXT: retq 2364 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2365 <8 x double> zeroinitializer, i8 %mask, i32 2) 2366 ret <8 x double> %res 2367} 2368 2369define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 2370; CHECK-LABEL: test_vmulpd_mask_rz: 2371; CHECK: ## BB#0: 2372; CHECK-NEXT: movzbl %dil, %eax 2373; CHECK-NEXT: kmovw %eax, %k1 2374; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 2375; CHECK-NEXT: retq 2376 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 2377 <8 x double> zeroinitializer, i8 %mask, i32 3) 2378 ret <8 x double> %res 2379} 2380 2381define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { 2382; CHECK-LABEL: test_xor_epi32: 2383; CHECK: ## BB#0: 2384; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 2385; CHECK-NEXT: retq 2386 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2387 ret < 16 x i32> %res 2388} 2389 2390define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2391; CHECK-LABEL: test_mask_xor_epi32: 2392; CHECK: ## BB#0: 2393; CHECK-NEXT: kmovw %edi, %k1 2394; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} 2395; CHECK-NEXT: vmovaps %zmm2, %zmm0 2396; CHECK-NEXT: retq 2397 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2398 ret < 16 x i32> %res 2399} 2400 2401declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2402 2403define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { 2404; CHECK-LABEL: test_or_epi32: 2405; CHECK: ## BB#0: 2406; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 2407; CHECK-NEXT: retq 2408 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2409 ret < 16 x i32> %res 2410} 2411 2412define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2413; CHECK-LABEL: test_mask_or_epi32: 2414; CHECK: ## BB#0: 2415; CHECK-NEXT: kmovw %edi, %k1 2416; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} 2417; CHECK-NEXT: vmovaps %zmm2, %zmm0 2418; CHECK-NEXT: retq 2419 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2420 ret < 16 x i32> %res 2421} 2422 2423declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2424 2425define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { 2426; CHECK-LABEL: test_and_epi32: 2427; CHECK: ## BB#0: 2428; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 2429; CHECK-NEXT: retq 2430 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 2431 ret < 16 x i32> %res 2432} 2433 2434define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2435; CHECK-LABEL: test_mask_and_epi32: 2436; CHECK: ## BB#0: 2437; CHECK-NEXT: kmovw %edi, %k1 2438; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 2439; CHECK-NEXT: vmovaps %zmm2, %zmm0 2440; CHECK-NEXT: retq 2441 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2442 ret < 16 x i32> %res 2443} 2444 2445declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2446 2447define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { 2448; CHECK-LABEL: test_xor_epi64: 2449; CHECK: ## BB#0: 2450; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 2451; CHECK-NEXT: retq 2452 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2453 ret < 8 x i64> %res 2454} 2455 2456define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2457; CHECK-LABEL: test_mask_xor_epi64: 2458; CHECK: ## BB#0: 2459; CHECK-NEXT: movzbl %dil, %eax 2460; CHECK-NEXT: kmovw %eax, %k1 2461; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} 2462; CHECK-NEXT: vmovaps %zmm2, %zmm0 2463; CHECK-NEXT: retq 2464 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2465 ret < 8 x i64> %res 2466} 2467 2468declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2469 2470define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { 2471; CHECK-LABEL: test_or_epi64: 2472; CHECK: ## BB#0: 2473; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 2474; CHECK-NEXT: retq 2475 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2476 ret < 8 x i64> %res 2477} 2478 2479define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2480; CHECK-LABEL: test_mask_or_epi64: 2481; CHECK: ## BB#0: 2482; CHECK-NEXT: movzbl %dil, %eax 2483; CHECK-NEXT: kmovw %eax, %k1 2484; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} 2485; CHECK-NEXT: vmovaps %zmm2, %zmm0 2486; CHECK-NEXT: retq 2487 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2488 ret < 8 x i64> %res 2489} 2490 2491declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2492 2493define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { 2494; CHECK-LABEL: test_and_epi64: 2495; CHECK: ## BB#0: 2496; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 2497; CHECK-NEXT: retq 2498 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 2499 ret < 8 x i64> %res 2500} 2501 2502define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2503; CHECK-LABEL: test_mask_and_epi64: 2504; CHECK: ## BB#0: 2505; CHECK-NEXT: movzbl %dil, %eax 2506; CHECK-NEXT: kmovw %eax, %k1 2507; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 2508; CHECK-NEXT: vmovaps %zmm2, %zmm0 2509; CHECK-NEXT: retq 2510 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2511 ret < 8 x i64> %res 2512} 2513 2514declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2515 2516 2517define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2518; CHECK-LABEL: test_mask_add_epi32_rr: 2519; CHECK: ## BB#0: 2520; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 2521; CHECK-NEXT: retq 2522 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2523 ret < 16 x i32> %res 2524} 2525 2526define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2527; CHECK-LABEL: test_mask_add_epi32_rrk: 2528; CHECK: ## BB#0: 2529; CHECK-NEXT: kmovw %edi, %k1 2530; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} 2531; CHECK-NEXT: vmovaps %zmm2, %zmm0 2532; CHECK-NEXT: retq 2533 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2534 ret < 16 x i32> %res 2535} 2536 2537define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2538; CHECK-LABEL: test_mask_add_epi32_rrkz: 2539; CHECK: ## BB#0: 2540; CHECK-NEXT: kmovw %edi, %k1 2541; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 2542; CHECK-NEXT: retq 2543 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2544 ret < 16 x i32> %res 2545} 2546 2547define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2548; CHECK-LABEL: test_mask_add_epi32_rm: 2549; CHECK: ## BB#0: 2550; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 2551; CHECK-NEXT: retq 2552 %b = load <16 x i32>, <16 x i32>* %ptr_b 2553 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2554 ret < 16 x i32> %res 2555} 2556 2557define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2558; CHECK-LABEL: test_mask_add_epi32_rmk: 2559; CHECK: ## BB#0: 2560; CHECK-NEXT: kmovw %esi, %k1 2561; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} 2562; CHECK-NEXT: vmovaps %zmm1, %zmm0 2563; CHECK-NEXT: retq 2564 %b = load <16 x i32>, <16 x i32>* %ptr_b 2565 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2566 ret < 16 x i32> %res 2567} 2568 2569define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2570; CHECK-LABEL: test_mask_add_epi32_rmkz: 2571; CHECK: ## BB#0: 2572; CHECK-NEXT: kmovw %esi, %k1 2573; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 2574; CHECK-NEXT: retq 2575 %b = load <16 x i32>, <16 x i32>* %ptr_b 2576 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2577 ret < 16 x i32> %res 2578} 2579 2580define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2581; CHECK-LABEL: test_mask_add_epi32_rmb: 2582; CHECK: ## BB#0: 2583; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 2584; CHECK-NEXT: retq 2585 %q = load i32, i32* %ptr_b 2586 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2587 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2588 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2589 ret < 16 x i32> %res 2590} 2591 2592define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2593; CHECK-LABEL: test_mask_add_epi32_rmbk: 2594; CHECK: ## BB#0: 2595; CHECK-NEXT: kmovw %esi, %k1 2596; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2597; CHECK-NEXT: vmovaps %zmm1, %zmm0 2598; CHECK-NEXT: retq 2599 %q = load i32, i32* %ptr_b 2600 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2601 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2602 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2603 ret < 16 x i32> %res 2604} 2605 2606define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2607; CHECK-LABEL: test_mask_add_epi32_rmbkz: 2608; CHECK: ## BB#0: 2609; CHECK-NEXT: kmovw %esi, %k1 2610; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2611; CHECK-NEXT: retq 2612 %q = load i32, i32* %ptr_b 2613 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2614 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2615 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2616 ret < 16 x i32> %res 2617} 2618 2619declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2620 2621define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2622; CHECK-LABEL: test_mask_sub_epi32_rr: 2623; CHECK: ## BB#0: 2624; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 2625; CHECK-NEXT: retq 2626 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2627 ret < 16 x i32> %res 2628} 2629 2630define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 2631; CHECK-LABEL: test_mask_sub_epi32_rrk: 2632; CHECK: ## BB#0: 2633; CHECK-NEXT: kmovw %edi, %k1 2634; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} 2635; CHECK-NEXT: vmovaps %zmm2, %zmm0 2636; CHECK-NEXT: retq 2637 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2638 ret < 16 x i32> %res 2639} 2640 2641define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 2642; CHECK-LABEL: test_mask_sub_epi32_rrkz: 2643; CHECK: ## BB#0: 2644; CHECK-NEXT: kmovw %edi, %k1 2645; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} 2646; CHECK-NEXT: retq 2647 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2648 ret < 16 x i32> %res 2649} 2650 2651define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2652; CHECK-LABEL: test_mask_sub_epi32_rm: 2653; CHECK: ## BB#0: 2654; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 2655; CHECK-NEXT: retq 2656 %b = load <16 x i32>, <16 x i32>* %ptr_b 2657 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2658 ret < 16 x i32> %res 2659} 2660 2661define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2662; CHECK-LABEL: test_mask_sub_epi32_rmk: 2663; CHECK: ## BB#0: 2664; CHECK-NEXT: kmovw %esi, %k1 2665; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} 2666; CHECK-NEXT: vmovaps %zmm1, %zmm0 2667; CHECK-NEXT: retq 2668 %b = load <16 x i32>, <16 x i32>* %ptr_b 2669 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2670 ret < 16 x i32> %res 2671} 2672 2673define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 2674; CHECK-LABEL: test_mask_sub_epi32_rmkz: 2675; CHECK: ## BB#0: 2676; CHECK-NEXT: kmovw %esi, %k1 2677; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} 2678; CHECK-NEXT: retq 2679 %b = load <16 x i32>, <16 x i32>* %ptr_b 2680 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2681 ret < 16 x i32> %res 2682} 2683 2684define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 2685; CHECK-LABEL: test_mask_sub_epi32_rmb: 2686; CHECK: ## BB#0: 2687; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 2688; CHECK-NEXT: retq 2689 %q = load i32, i32* %ptr_b 2690 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2691 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2692 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 2693 ret < 16 x i32> %res 2694} 2695 2696define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 2697; CHECK-LABEL: test_mask_sub_epi32_rmbk: 2698; CHECK: ## BB#0: 2699; CHECK-NEXT: kmovw %esi, %k1 2700; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} 2701; CHECK-NEXT: vmovaps %zmm1, %zmm0 2702; CHECK-NEXT: retq 2703 %q = load i32, i32* %ptr_b 2704 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2705 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2706 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 2707 ret < 16 x i32> %res 2708} 2709 2710define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 2711; CHECK-LABEL: test_mask_sub_epi32_rmbkz: 2712; CHECK: ## BB#0: 2713; CHECK-NEXT: kmovw %esi, %k1 2714; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 2715; CHECK-NEXT: retq 2716 %q = load i32, i32* %ptr_b 2717 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 2718 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 2719 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 2720 ret < 16 x i32> %res 2721} 2722 2723declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 2724 2725define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2726; CHECK-LABEL: test_mask_add_epi64_rr: 2727; CHECK: ## BB#0: 2728; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 2729; CHECK-NEXT: retq 2730 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2731 ret < 8 x i64> %res 2732} 2733 2734define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2735; CHECK-LABEL: test_mask_add_epi64_rrk: 2736; CHECK: ## BB#0: 2737; CHECK-NEXT: movzbl %dil, %eax 2738; CHECK-NEXT: kmovw %eax, %k1 2739; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} 2740; CHECK-NEXT: vmovaps %zmm2, %zmm0 2741; CHECK-NEXT: retq 2742 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2743 ret < 8 x i64> %res 2744} 2745 2746define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2747; CHECK-LABEL: test_mask_add_epi64_rrkz: 2748; CHECK: ## BB#0: 2749; CHECK-NEXT: movzbl %dil, %eax 2750; CHECK-NEXT: kmovw %eax, %k1 2751; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} 2752; CHECK-NEXT: retq 2753 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2754 ret < 8 x i64> %res 2755} 2756 2757define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2758; CHECK-LABEL: test_mask_add_epi64_rm: 2759; CHECK: ## BB#0: 2760; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 2761; CHECK-NEXT: retq 2762 %b = load <8 x i64>, <8 x i64>* %ptr_b 2763 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2764 ret < 8 x i64> %res 2765} 2766 2767define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2768; CHECK-LABEL: test_mask_add_epi64_rmk: 2769; CHECK: ## BB#0: 2770; CHECK-NEXT: movzbl %sil, %eax 2771; CHECK-NEXT: kmovw %eax, %k1 2772; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} 2773; CHECK-NEXT: vmovaps %zmm1, %zmm0 2774; CHECK-NEXT: retq 2775 %b = load <8 x i64>, <8 x i64>* %ptr_b 2776 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2777 ret < 8 x i64> %res 2778} 2779 2780define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2781; CHECK-LABEL: test_mask_add_epi64_rmkz: 2782; CHECK: ## BB#0: 2783; CHECK-NEXT: movzbl %sil, %eax 2784; CHECK-NEXT: kmovw %eax, %k1 2785; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} 2786; CHECK-NEXT: retq 2787 %b = load <8 x i64>, <8 x i64>* %ptr_b 2788 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2789 ret < 8 x i64> %res 2790} 2791 2792define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2793; CHECK-LABEL: test_mask_add_epi64_rmb: 2794; CHECK: ## BB#0: 2795; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 2796; CHECK-NEXT: retq 2797 %q = load i64, i64* %ptr_b 2798 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2799 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2800 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2801 ret < 8 x i64> %res 2802} 2803 2804define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2805; CHECK-LABEL: test_mask_add_epi64_rmbk: 2806; CHECK: ## BB#0: 2807; CHECK-NEXT: movzbl %sil, %eax 2808; CHECK-NEXT: kmovw %eax, %k1 2809; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2810; CHECK-NEXT: vmovaps %zmm1, %zmm0 2811; CHECK-NEXT: retq 2812 %q = load i64, i64* %ptr_b 2813 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2814 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2815 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2816 ret < 8 x i64> %res 2817} 2818 2819define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2820; CHECK-LABEL: test_mask_add_epi64_rmbkz: 2821; CHECK: ## BB#0: 2822; CHECK-NEXT: movzbl %sil, %eax 2823; CHECK-NEXT: kmovw %eax, %k1 2824; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2825; CHECK-NEXT: retq 2826 %q = load i64, i64* %ptr_b 2827 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2828 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2829 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2830 ret < 8 x i64> %res 2831} 2832 2833declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2834 2835define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 2836; CHECK-LABEL: test_mask_sub_epi64_rr: 2837; CHECK: ## BB#0: 2838; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 2839; CHECK-NEXT: retq 2840 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2841 ret < 8 x i64> %res 2842} 2843 2844define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 2845; CHECK-LABEL: test_mask_sub_epi64_rrk: 2846; CHECK: ## BB#0: 2847; CHECK-NEXT: movzbl %dil, %eax 2848; CHECK-NEXT: kmovw %eax, %k1 2849; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} 2850; CHECK-NEXT: vmovaps %zmm2, %zmm0 2851; CHECK-NEXT: retq 2852 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2853 ret < 8 x i64> %res 2854} 2855 2856define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 2857; CHECK-LABEL: test_mask_sub_epi64_rrkz: 2858; CHECK: ## BB#0: 2859; CHECK-NEXT: movzbl %dil, %eax 2860; CHECK-NEXT: kmovw %eax, %k1 2861; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} 2862; CHECK-NEXT: retq 2863 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2864 ret < 8 x i64> %res 2865} 2866 2867define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 2868; CHECK-LABEL: test_mask_sub_epi64_rm: 2869; CHECK: ## BB#0: 2870; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 2871; CHECK-NEXT: retq 2872 %b = load <8 x i64>, <8 x i64>* %ptr_b 2873 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2874 ret < 8 x i64> %res 2875} 2876 2877define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2878; CHECK-LABEL: test_mask_sub_epi64_rmk: 2879; CHECK: ## BB#0: 2880; CHECK-NEXT: movzbl %sil, %eax 2881; CHECK-NEXT: kmovw %eax, %k1 2882; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} 2883; CHECK-NEXT: vmovaps %zmm1, %zmm0 2884; CHECK-NEXT: retq 2885 %b = load <8 x i64>, <8 x i64>* %ptr_b 2886 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2887 ret < 8 x i64> %res 2888} 2889 2890define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 2891; CHECK-LABEL: test_mask_sub_epi64_rmkz: 2892; CHECK: ## BB#0: 2893; CHECK-NEXT: movzbl %sil, %eax 2894; CHECK-NEXT: kmovw %eax, %k1 2895; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} 2896; CHECK-NEXT: retq 2897 %b = load <8 x i64>, <8 x i64>* %ptr_b 2898 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2899 ret < 8 x i64> %res 2900} 2901 2902define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 2903; CHECK-LABEL: test_mask_sub_epi64_rmb: 2904; CHECK: ## BB#0: 2905; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 2906; CHECK-NEXT: retq 2907 %q = load i64, i64* %ptr_b 2908 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2909 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2910 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 2911 ret < 8 x i64> %res 2912} 2913 2914define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2915; CHECK-LABEL: test_mask_sub_epi64_rmbk: 2916; CHECK: ## BB#0: 2917; CHECK-NEXT: movzbl %sil, %eax 2918; CHECK-NEXT: kmovw %eax, %k1 2919; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 2920; CHECK-NEXT: vmovaps %zmm1, %zmm0 2921; CHECK-NEXT: retq 2922 %q = load i64, i64* %ptr_b 2923 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2924 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2925 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 2926 ret < 8 x i64> %res 2927} 2928 2929define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 2930; CHECK-LABEL: test_mask_sub_epi64_rmbkz: 2931; CHECK: ## BB#0: 2932; CHECK-NEXT: movzbl %sil, %eax 2933; CHECK-NEXT: kmovw %eax, %k1 2934; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 2935; CHECK-NEXT: retq 2936 %q = load i64, i64* %ptr_b 2937 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2938 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2939 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2940 ret < 8 x i64> %res 2941} 2942 2943declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2944 2945define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2946; CHECK-LABEL: test_mask_mul_epi32_rr: 2947; CHECK: ## BB#0: 2948; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 2949; CHECK-NEXT: retq 2950 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2951 ret < 8 x i64> %res 2952} 2953 2954define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2955; CHECK-LABEL: test_mask_mul_epi32_rrk: 2956; CHECK: ## BB#0: 2957; CHECK-NEXT: movzbl %dil, %eax 2958; CHECK-NEXT: kmovw %eax, %k1 2959; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} 2960; CHECK-NEXT: vmovaps %zmm2, %zmm0 2961; CHECK-NEXT: retq 2962 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2963 ret < 8 x i64> %res 2964} 2965 2966define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2967; CHECK-LABEL: test_mask_mul_epi32_rrkz: 2968; CHECK: ## BB#0: 2969; CHECK-NEXT: movzbl %dil, %eax 2970; CHECK-NEXT: kmovw %eax, %k1 2971; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} 2972; CHECK-NEXT: retq 2973 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2974 ret < 8 x i64> %res 2975} 2976 2977define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2978; CHECK-LABEL: test_mask_mul_epi32_rm: 2979; CHECK: ## BB#0: 2980; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 2981; CHECK-NEXT: retq 2982 %b = load <16 x i32>, <16 x i32>* %ptr_b 2983 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2984 ret < 8 x i64> %res 2985} 2986 2987define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2988; CHECK-LABEL: test_mask_mul_epi32_rmk: 2989; CHECK: ## BB#0: 2990; CHECK-NEXT: movzbl %sil, %eax 2991; CHECK-NEXT: kmovw %eax, %k1 2992; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} 2993; CHECK-NEXT: vmovaps %zmm1, %zmm0 2994; CHECK-NEXT: retq 2995 %b = load <16 x i32>, <16 x i32>* %ptr_b 2996 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2997 ret < 8 x i64> %res 2998} 2999 3000define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 3001; CHECK-LABEL: test_mask_mul_epi32_rmkz: 3002; CHECK: ## BB#0: 3003; CHECK-NEXT: movzbl %sil, %eax 3004; CHECK-NEXT: kmovw %eax, %k1 3005; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} 3006; CHECK-NEXT: retq 3007 %b = load <16 x i32>, <16 x i32>* %ptr_b 3008 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3009 ret < 8 x i64> %res 3010} 3011 3012define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 3013; CHECK-LABEL: test_mask_mul_epi32_rmb: 3014; CHECK: ## BB#0: 3015; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 3016; CHECK-NEXT: retq 3017 %q = load i64, i64* %ptr_b 3018 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3019 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3020 %b = bitcast <8 x i64> %b64 to <16 x i32> 3021 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3022 ret < 8 x i64> %res 3023} 3024 3025define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3026; CHECK-LABEL: test_mask_mul_epi32_rmbk: 3027; CHECK: ## BB#0: 3028; CHECK-NEXT: movzbl %sil, %eax 3029; CHECK-NEXT: kmovw %eax, %k1 3030; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 3031; CHECK-NEXT: vmovaps %zmm1, %zmm0 3032; CHECK-NEXT: retq 3033 %q = load i64, i64* %ptr_b 3034 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3035 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3036 %b = bitcast <8 x i64> %b64 to <16 x i32> 3037 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3038 ret < 8 x i64> %res 3039} 3040 3041define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 3042; CHECK-LABEL: test_mask_mul_epi32_rmbkz: 3043; CHECK: ## BB#0: 3044; CHECK-NEXT: movzbl %sil, %eax 3045; CHECK-NEXT: kmovw %eax, %k1 3046; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 3047; CHECK-NEXT: retq 3048 %q = load i64, i64* %ptr_b 3049 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3050 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3051 %b = bitcast <8 x i64> %b64 to <16 x i32> 3052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3053 ret < 8 x i64> %res 3054} 3055 3056declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 3057 3058define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 3059; CHECK-LABEL: test_mask_mul_epu32_rr: 3060; CHECK: ## BB#0: 3061; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 3062; CHECK-NEXT: retq 3063 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3064 ret < 8 x i64> %res 3065} 3066 3067define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 3068; CHECK-LABEL: test_mask_mul_epu32_rrk: 3069; CHECK: ## BB#0: 3070; CHECK-NEXT: movzbl %dil, %eax 3071; CHECK-NEXT: kmovw %eax, %k1 3072; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} 3073; CHECK-NEXT: vmovaps %zmm2, %zmm0 3074; CHECK-NEXT: retq 3075 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3076 ret < 8 x i64> %res 3077} 3078 3079define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 3080; CHECK-LABEL: test_mask_mul_epu32_rrkz: 3081; CHECK: ## BB#0: 3082; CHECK-NEXT: movzbl %dil, %eax 3083; CHECK-NEXT: kmovw %eax, %k1 3084; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} 3085; CHECK-NEXT: retq 3086 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3087 ret < 8 x i64> %res 3088} 3089 3090define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 3091; CHECK-LABEL: test_mask_mul_epu32_rm: 3092; CHECK: ## BB#0: 3093; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 3094; CHECK-NEXT: retq 3095 %b = load <16 x i32>, <16 x i32>* %ptr_b 3096 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3097 ret < 8 x i64> %res 3098} 3099 3100define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3101; CHECK-LABEL: test_mask_mul_epu32_rmk: 3102; CHECK: ## BB#0: 3103; CHECK-NEXT: movzbl %sil, %eax 3104; CHECK-NEXT: kmovw %eax, %k1 3105; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} 3106; CHECK-NEXT: vmovaps %zmm1, %zmm0 3107; CHECK-NEXT: retq 3108 %b = load <16 x i32>, <16 x i32>* %ptr_b 3109 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3110 ret < 8 x i64> %res 3111} 3112 3113define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 3114; CHECK-LABEL: test_mask_mul_epu32_rmkz: 3115; CHECK: ## BB#0: 3116; CHECK-NEXT: movzbl %sil, %eax 3117; CHECK-NEXT: kmovw %eax, %k1 3118; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} 3119; CHECK-NEXT: retq 3120 %b = load <16 x i32>, <16 x i32>* %ptr_b 3121 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3122 ret < 8 x i64> %res 3123} 3124 3125define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 3126; CHECK-LABEL: test_mask_mul_epu32_rmb: 3127; CHECK: ## BB#0: 3128; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 3129; CHECK-NEXT: retq 3130 %q = load i64, i64* %ptr_b 3131 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3132 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3133 %b = bitcast <8 x i64> %b64 to <16 x i32> 3134 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 3135 ret < 8 x i64> %res 3136} 3137 3138define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 3139; CHECK-LABEL: test_mask_mul_epu32_rmbk: 3140; CHECK: ## BB#0: 3141; CHECK-NEXT: movzbl %sil, %eax 3142; CHECK-NEXT: kmovw %eax, %k1 3143; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} 3144; CHECK-NEXT: vmovaps %zmm1, %zmm0 3145; CHECK-NEXT: retq 3146 %q = load i64, i64* %ptr_b 3147 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3148 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3149 %b = bitcast <8 x i64> %b64 to <16 x i32> 3150 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 3151 ret < 8 x i64> %res 3152} 3153 3154define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 3155; CHECK-LABEL: test_mask_mul_epu32_rmbkz: 3156; CHECK: ## BB#0: 3157; CHECK-NEXT: movzbl %sil, %eax 3158; CHECK-NEXT: kmovw %eax, %k1 3159; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 3160; CHECK-NEXT: retq 3161 %q = load i64, i64* %ptr_b 3162 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 3163 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 3164 %b = bitcast <8 x i64> %b64 to <16 x i32> 3165 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 3166 ret < 8 x i64> %res 3167} 3168 3169declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 3170 3171define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 3172; CHECK-LABEL: test_mask_mullo_epi32_rr_512: 3173; CHECK: ## BB#0: 3174; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 3175; CHECK-NEXT: retq 3176 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3177 ret <16 x i32> %res 3178} 3179 3180define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 3181; CHECK-LABEL: test_mask_mullo_epi32_rrk_512: 3182; CHECK: ## BB#0: 3183; CHECK-NEXT: kmovw %edi, %k1 3184; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} 3185; CHECK-NEXT: vmovaps %zmm2, %zmm0 3186; CHECK-NEXT: retq 3187 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3188 ret < 16 x i32> %res 3189} 3190 3191define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 3192; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512: 3193; CHECK: ## BB#0: 3194; CHECK-NEXT: kmovw %edi, %k1 3195; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} 3196; CHECK-NEXT: retq 3197 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3198 ret < 16 x i32> %res 3199} 3200 3201define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 3202; CHECK-LABEL: test_mask_mullo_epi32_rm_512: 3203; CHECK: ## BB#0: 3204; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 3205; CHECK-NEXT: retq 3206 %b = load <16 x i32>, <16 x i32>* %ptr_b 3207 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3208 ret < 16 x i32> %res 3209} 3210 3211define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 3212; CHECK-LABEL: test_mask_mullo_epi32_rmk_512: 3213; CHECK: ## BB#0: 3214; CHECK-NEXT: kmovw %esi, %k1 3215; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} 3216; CHECK-NEXT: vmovaps %zmm1, %zmm0 3217; CHECK-NEXT: retq 3218 %b = load <16 x i32>, <16 x i32>* %ptr_b 3219 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3220 ret < 16 x i32> %res 3221} 3222 3223define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 3224; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512: 3225; CHECK: ## BB#0: 3226; CHECK-NEXT: kmovw %esi, %k1 3227; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} 3228; CHECK-NEXT: retq 3229 %b = load <16 x i32>, <16 x i32>* %ptr_b 3230 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3231 ret < 16 x i32> %res 3232} 3233 3234define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 3235; CHECK-LABEL: test_mask_mullo_epi32_rmb_512: 3236; CHECK: ## BB#0: 3237; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 3238; CHECK-NEXT: retq 3239 %q = load i32, i32* %ptr_b 3240 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3241 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3242 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 3243 ret < 16 x i32> %res 3244} 3245 3246define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 3247; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512: 3248; CHECK: ## BB#0: 3249; CHECK-NEXT: kmovw %esi, %k1 3250; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} 3251; CHECK-NEXT: vmovaps %zmm1, %zmm0 3252; CHECK-NEXT: retq 3253 %q = load i32, i32* %ptr_b 3254 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3255 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3256 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 3257 ret < 16 x i32> %res 3258} 3259 3260define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 3261; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512: 3262; CHECK: ## BB#0: 3263; CHECK-NEXT: kmovw %esi, %k1 3264; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 3265; CHECK-NEXT: retq 3266 %q = load i32, i32* %ptr_b 3267 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 3268 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 3269 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 3270 ret < 16 x i32> %res 3271} 3272 3273declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 3274 3275define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3276; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae: 3277; CHECK: ## BB#0: 3278; CHECK-NEXT: kmovw %edi, %k1 3279; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3280; CHECK-NEXT: retq 3281 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 3282 ret <16 x float> %res 3283} 3284define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3285; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae: 3286; CHECK: ## BB#0: 3287; CHECK-NEXT: kmovw %edi, %k1 3288; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3289; CHECK-NEXT: retq 3290 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 3291 ret <16 x float> %res 3292} 3293define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3294; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae: 3295; CHECK: ## BB#0: 3296; CHECK-NEXT: kmovw %edi, %k1 3297; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3298; CHECK-NEXT: retq 3299 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 3300 ret <16 x float> %res 3301} 3302 3303define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3304; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae: 3305; CHECK: ## BB#0: 3306; CHECK-NEXT: kmovw %edi, %k1 3307; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3308; CHECK-NEXT: retq 3309 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 3310 ret <16 x float> %res 3311} 3312 3313 3314define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3315; CHECK-LABEL: test_mm512_maskz_add_round_ps_current: 3316; CHECK: ## BB#0: 3317; CHECK-NEXT: kmovw %edi, %k1 3318; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} 3319; CHECK-NEXT: retq 3320 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3321 ret <16 x float> %res 3322} 3323 3324define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3325; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae: 3326; CHECK: ## BB#0: 3327; CHECK-NEXT: kmovw %edi, %k1 3328; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3329; CHECK-NEXT: vmovaps %zmm2, %zmm0 3330; CHECK-NEXT: retq 3331 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3332 ret <16 x float> %res 3333} 3334define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3335; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae: 3336; CHECK: ## BB#0: 3337; CHECK-NEXT: kmovw %edi, %k1 3338; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3339; CHECK-NEXT: vmovaps %zmm2, %zmm0 3340; CHECK-NEXT: retq 3341 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3342 ret <16 x float> %res 3343} 3344define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3345; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae: 3346; CHECK: ## BB#0: 3347; CHECK-NEXT: kmovw %edi, %k1 3348; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3349; CHECK-NEXT: vmovaps %zmm2, %zmm0 3350; CHECK-NEXT: retq 3351 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3352 ret <16 x float> %res 3353} 3354 3355define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3356; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae: 3357; CHECK: ## BB#0: 3358; CHECK-NEXT: kmovw %edi, %k1 3359; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3360; CHECK-NEXT: vmovaps %zmm2, %zmm0 3361; CHECK-NEXT: retq 3362 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3363 ret <16 x float> %res 3364} 3365 3366 3367define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3368; CHECK-LABEL: test_mm512_mask_add_round_ps_current: 3369; CHECK: ## BB#0: 3370; CHECK-NEXT: kmovw %edi, %k1 3371; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} 3372; CHECK-NEXT: vmovaps %zmm2, %zmm0 3373; CHECK-NEXT: retq 3374 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3375 ret <16 x float> %res 3376} 3377 3378 3379define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3380; CHECK-LABEL: test_mm512_add_round_ps_rn_sae: 3381; CHECK: ## BB#0: 3382; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 3383; CHECK-NEXT: retq 3384 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3385 ret <16 x float> %res 3386} 3387define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3388; CHECK-LABEL: test_mm512_add_round_ps_rd_sae: 3389; CHECK: ## BB#0: 3390; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 3391; CHECK-NEXT: retq 3392 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3393 ret <16 x float> %res 3394} 3395define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3396; CHECK-LABEL: test_mm512_add_round_ps_ru_sae: 3397; CHECK: ## BB#0: 3398; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 3399; CHECK-NEXT: retq 3400 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3401 ret <16 x float> %res 3402} 3403 3404define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3405; CHECK-LABEL: test_mm512_add_round_ps_rz_sae: 3406; CHECK: ## BB#0: 3407; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 3408; CHECK-NEXT: retq 3409 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3410 ret <16 x float> %res 3411} 3412 3413define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3414; CHECK-LABEL: test_mm512_add_round_ps_current: 3415; CHECK: ## BB#0: 3416; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 3417; CHECK-NEXT: retq 3418 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3419 ret <16 x float> %res 3420} 3421declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3422 3423define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3424; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae: 3425; CHECK: ## BB#0: 3426; CHECK-NEXT: kmovw %edi, %k1 3427; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3428; CHECK-NEXT: vmovaps %zmm2, %zmm0 3429; CHECK-NEXT: retq 3430 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3431 ret <16 x float> %res 3432} 3433define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3434; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae: 3435; CHECK: ## BB#0: 3436; CHECK-NEXT: kmovw %edi, %k1 3437; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3438; CHECK-NEXT: vmovaps %zmm2, %zmm0 3439; CHECK-NEXT: retq 3440 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3441 ret <16 x float> %res 3442} 3443define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3444; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae: 3445; CHECK: ## BB#0: 3446; CHECK-NEXT: kmovw %edi, %k1 3447; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3448; CHECK-NEXT: vmovaps %zmm2, %zmm0 3449; CHECK-NEXT: retq 3450 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3451 ret <16 x float> %res 3452} 3453 3454define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3455; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae: 3456; CHECK: ## BB#0: 3457; CHECK-NEXT: kmovw %edi, %k1 3458; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3459; CHECK-NEXT: vmovaps %zmm2, %zmm0 3460; CHECK-NEXT: retq 3461 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3462 ret <16 x float> %res 3463} 3464 3465 3466define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3467; CHECK-LABEL: test_mm512_mask_sub_round_ps_current: 3468; CHECK: ## BB#0: 3469; CHECK-NEXT: kmovw %edi, %k1 3470; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} 3471; CHECK-NEXT: vmovaps %zmm2, %zmm0 3472; CHECK-NEXT: retq 3473 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3474 ret <16 x float> %res 3475} 3476 3477define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3478; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae: 3479; CHECK: ## BB#0: 3480; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 3481; CHECK-NEXT: retq 3482 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3483 ret <16 x float> %res 3484} 3485define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3486; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae: 3487; CHECK: ## BB#0: 3488; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 3489; CHECK-NEXT: retq 3490 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3491 ret <16 x float> %res 3492} 3493define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3494; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae: 3495; CHECK: ## BB#0: 3496; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 3497; CHECK-NEXT: retq 3498 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3499 ret <16 x float> %res 3500} 3501 3502define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3503; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae: 3504; CHECK: ## BB#0: 3505; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 3506; CHECK-NEXT: retq 3507 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3508 ret <16 x float> %res 3509} 3510 3511define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3512; CHECK-LABEL: test_mm512_sub_round_ps_current: 3513; CHECK: ## BB#0: 3514; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 3515; CHECK-NEXT: retq 3516 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3517 ret <16 x float> %res 3518} 3519 3520define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3521; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae: 3522; CHECK: ## BB#0: 3523; CHECK-NEXT: kmovw %edi, %k1 3524; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3525; CHECK-NEXT: retq 3526 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0) 3527 ret <16 x float> %res 3528} 3529define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3530; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae: 3531; CHECK: ## BB#0: 3532; CHECK-NEXT: kmovw %edi, %k1 3533; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3534; CHECK-NEXT: retq 3535 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1) 3536 ret <16 x float> %res 3537} 3538define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3539; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae: 3540; CHECK: ## BB#0: 3541; CHECK-NEXT: kmovw %edi, %k1 3542; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3543; CHECK-NEXT: retq 3544 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2) 3545 ret <16 x float> %res 3546} 3547 3548define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3549; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae: 3550; CHECK: ## BB#0: 3551; CHECK-NEXT: kmovw %edi, %k1 3552; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3553; CHECK-NEXT: retq 3554 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3) 3555 ret <16 x float> %res 3556} 3557 3558 3559define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3560; CHECK-LABEL: test_mm512_maskz_div_round_ps_current: 3561; CHECK: ## BB#0: 3562; CHECK-NEXT: kmovw %edi, %k1 3563; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} 3564; CHECK-NEXT: retq 3565 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3566 ret <16 x float> %res 3567} 3568 3569define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3570; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae: 3571; CHECK: ## BB#0: 3572; CHECK-NEXT: kmovw %edi, %k1 3573; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3574; CHECK-NEXT: vmovaps %zmm2, %zmm0 3575; CHECK-NEXT: retq 3576 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) 3577 ret <16 x float> %res 3578} 3579define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3580; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae: 3581; CHECK: ## BB#0: 3582; CHECK-NEXT: kmovw %edi, %k1 3583; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3584; CHECK-NEXT: vmovaps %zmm2, %zmm0 3585; CHECK-NEXT: retq 3586 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) 3587 ret <16 x float> %res 3588} 3589define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3590; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae: 3591; CHECK: ## BB#0: 3592; CHECK-NEXT: kmovw %edi, %k1 3593; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3594; CHECK-NEXT: vmovaps %zmm2, %zmm0 3595; CHECK-NEXT: retq 3596 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) 3597 ret <16 x float> %res 3598} 3599 3600define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3601; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae: 3602; CHECK: ## BB#0: 3603; CHECK-NEXT: kmovw %edi, %k1 3604; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 3605; CHECK-NEXT: vmovaps %zmm2, %zmm0 3606; CHECK-NEXT: retq 3607 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) 3608 ret <16 x float> %res 3609} 3610 3611 3612define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3613; CHECK-LABEL: test_mm512_mask_div_round_ps_current: 3614; CHECK: ## BB#0: 3615; CHECK-NEXT: kmovw %edi, %k1 3616; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} 3617; CHECK-NEXT: vmovaps %zmm2, %zmm0 3618; CHECK-NEXT: retq 3619 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3620 ret <16 x float> %res 3621} 3622 3623 3624define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3625; CHECK-LABEL: test_mm512_div_round_ps_rn_sae: 3626; CHECK: ## BB#0: 3627; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 3628; CHECK-NEXT: retq 3629 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0) 3630 ret <16 x float> %res 3631} 3632define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3633; CHECK-LABEL: test_mm512_div_round_ps_rd_sae: 3634; CHECK: ## BB#0: 3635; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 3636; CHECK-NEXT: retq 3637 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1) 3638 ret <16 x float> %res 3639} 3640define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3641; CHECK-LABEL: test_mm512_div_round_ps_ru_sae: 3642; CHECK: ## BB#0: 3643; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 3644; CHECK-NEXT: retq 3645 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2) 3646 ret <16 x float> %res 3647} 3648 3649define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3650; CHECK-LABEL: test_mm512_div_round_ps_rz_sae: 3651; CHECK: ## BB#0: 3652; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 3653; CHECK-NEXT: retq 3654 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3) 3655 ret <16 x float> %res 3656} 3657 3658define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3659; CHECK-LABEL: test_mm512_div_round_ps_current: 3660; CHECK: ## BB#0: 3661; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 3662; CHECK-NEXT: retq 3663 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3664 ret <16 x float> %res 3665} 3666declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3667 3668define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3669; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae: 3670; CHECK: ## BB#0: 3671; CHECK-NEXT: kmovw %edi, %k1 3672; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3673; CHECK-NEXT: retq 3674 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3675 ret <16 x float> %res 3676} 3677 3678define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3679; CHECK-LABEL: test_mm512_maskz_min_round_ps_current: 3680; CHECK: ## BB#0: 3681; CHECK-NEXT: kmovw %edi, %k1 3682; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} 3683; CHECK-NEXT: retq 3684 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3685 ret <16 x float> %res 3686} 3687 3688define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3689; CHECK-LABEL: test_mm512_mask_min_round_ps_sae: 3690; CHECK: ## BB#0: 3691; CHECK-NEXT: kmovw %edi, %k1 3692; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3693; CHECK-NEXT: vmovaps %zmm2, %zmm0 3694; CHECK-NEXT: retq 3695 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3696 ret <16 x float> %res 3697} 3698 3699define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3700; CHECK-LABEL: test_mm512_mask_min_round_ps_current: 3701; CHECK: ## BB#0: 3702; CHECK-NEXT: kmovw %edi, %k1 3703; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} 3704; CHECK-NEXT: vmovaps %zmm2, %zmm0 3705; CHECK-NEXT: retq 3706 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3707 ret <16 x float> %res 3708} 3709 3710define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3711; CHECK-LABEL: test_mm512_min_round_ps_sae: 3712; CHECK: ## BB#0: 3713; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 3714; CHECK-NEXT: retq 3715 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3716 ret <16 x float> %res 3717} 3718 3719define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3720; CHECK-LABEL: test_mm512_min_round_ps_current: 3721; CHECK: ## BB#0: 3722; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 3723; CHECK-NEXT: retq 3724 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3725 ret <16 x float> %res 3726} 3727declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3728 3729define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3730; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae: 3731; CHECK: ## BB#0: 3732; CHECK-NEXT: kmovw %edi, %k1 3733; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 3734; CHECK-NEXT: retq 3735 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) 3736 ret <16 x float> %res 3737} 3738 3739define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3740; CHECK-LABEL: test_mm512_maskz_max_round_ps_current: 3741; CHECK: ## BB#0: 3742; CHECK-NEXT: kmovw %edi, %k1 3743; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} 3744; CHECK-NEXT: retq 3745 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) 3746 ret <16 x float> %res 3747} 3748 3749define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3750; CHECK-LABEL: test_mm512_mask_max_round_ps_sae: 3751; CHECK: ## BB#0: 3752; CHECK-NEXT: kmovw %edi, %k1 3753; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} 3754; CHECK-NEXT: vmovaps %zmm2, %zmm0 3755; CHECK-NEXT: retq 3756 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) 3757 ret <16 x float> %res 3758} 3759 3760define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { 3761; CHECK-LABEL: test_mm512_mask_max_round_ps_current: 3762; CHECK: ## BB#0: 3763; CHECK-NEXT: kmovw %edi, %k1 3764; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} 3765; CHECK-NEXT: vmovaps %zmm2, %zmm0 3766; CHECK-NEXT: retq 3767 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) 3768 ret <16 x float> %res 3769} 3770 3771define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3772; CHECK-LABEL: test_mm512_max_round_ps_sae: 3773; CHECK: ## BB#0: 3774; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 3775; CHECK-NEXT: retq 3776 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) 3777 ret <16 x float> %res 3778} 3779 3780define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 3781; CHECK-LABEL: test_mm512_max_round_ps_current: 3782; CHECK: ## BB#0: 3783; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 3784; CHECK-NEXT: retq 3785 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) 3786 ret <16 x float> %res 3787} 3788declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 3789 3790declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3791 3792define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3793; CHECK-LABEL: test_mask_add_ss_rn: 3794; CHECK: ## BB#0: 3795; CHECK-NEXT: andl $1, %edi 3796; CHECK-NEXT: kmovw %edi, %k1 3797; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3798; CHECK-NEXT: vmovaps %zmm2, %zmm0 3799; CHECK-NEXT: retq 3800 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0) 3801 ret <4 x float> %res 3802} 3803 3804define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3805; CHECK-LABEL: test_mask_add_ss_rd: 3806; CHECK: ## BB#0: 3807; CHECK-NEXT: andl $1, %edi 3808; CHECK-NEXT: kmovw %edi, %k1 3809; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3810; CHECK-NEXT: vmovaps %zmm2, %zmm0 3811; CHECK-NEXT: retq 3812 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1) 3813 ret <4 x float> %res 3814} 3815 3816define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3817; CHECK-LABEL: test_mask_add_ss_ru: 3818; CHECK: ## BB#0: 3819; CHECK-NEXT: andl $1, %edi 3820; CHECK-NEXT: kmovw %edi, %k1 3821; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3822; CHECK-NEXT: vmovaps %zmm2, %zmm0 3823; CHECK-NEXT: retq 3824 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2) 3825 ret <4 x float> %res 3826} 3827 3828define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3829; CHECK-LABEL: test_mask_add_ss_rz: 3830; CHECK: ## BB#0: 3831; CHECK-NEXT: andl $1, %edi 3832; CHECK-NEXT: kmovw %edi, %k1 3833; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3834; CHECK-NEXT: vmovaps %zmm2, %zmm0 3835; CHECK-NEXT: retq 3836 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3) 3837 ret <4 x float> %res 3838} 3839 3840define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3841; CHECK-LABEL: test_mask_add_ss_current: 3842; CHECK: ## BB#0: 3843; CHECK-NEXT: andl $1, %edi 3844; CHECK-NEXT: kmovw %edi, %k1 3845; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} 3846; CHECK-NEXT: vmovaps %zmm2, %zmm0 3847; CHECK-NEXT: retq 3848 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3849 ret <4 x float> %res 3850} 3851 3852define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3853; CHECK-LABEL: test_maskz_add_ss_rn: 3854; CHECK: ## BB#0: 3855; CHECK-NEXT: andl $1, %edi 3856; CHECK-NEXT: kmovw %edi, %k1 3857; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3858; CHECK-NEXT: retq 3859 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0) 3860 ret <4 x float> %res 3861} 3862 3863define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) { 3864; CHECK-LABEL: test_add_ss_rn: 3865; CHECK: ## BB#0: 3866; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 3867; CHECK-NEXT: retq 3868 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0) 3869 ret <4 x float> %res 3870} 3871 3872declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 3873 3874define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3875; CHECK-LABEL: test_mask_add_sd_rn: 3876; CHECK: ## BB#0: 3877; CHECK-NEXT: andl $1, %edi 3878; CHECK-NEXT: kmovw %edi, %k1 3879; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3880; CHECK-NEXT: vmovaps %zmm2, %zmm0 3881; CHECK-NEXT: retq 3882 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0) 3883 ret <2 x double> %res 3884} 3885 3886define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3887; CHECK-LABEL: test_mask_add_sd_rd: 3888; CHECK: ## BB#0: 3889; CHECK-NEXT: andl $1, %edi 3890; CHECK-NEXT: kmovw %edi, %k1 3891; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3892; CHECK-NEXT: vmovaps %zmm2, %zmm0 3893; CHECK-NEXT: retq 3894 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1) 3895 ret <2 x double> %res 3896} 3897 3898define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3899; CHECK-LABEL: test_mask_add_sd_ru: 3900; CHECK: ## BB#0: 3901; CHECK-NEXT: andl $1, %edi 3902; CHECK-NEXT: kmovw %edi, %k1 3903; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3904; CHECK-NEXT: vmovaps %zmm2, %zmm0 3905; CHECK-NEXT: retq 3906 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2) 3907 ret <2 x double> %res 3908} 3909 3910define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3911; CHECK-LABEL: test_mask_add_sd_rz: 3912; CHECK: ## BB#0: 3913; CHECK-NEXT: andl $1, %edi 3914; CHECK-NEXT: kmovw %edi, %k1 3915; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 3916; CHECK-NEXT: vmovaps %zmm2, %zmm0 3917; CHECK-NEXT: retq 3918 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3) 3919 ret <2 x double> %res 3920} 3921 3922define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 3923; CHECK-LABEL: test_mask_add_sd_current: 3924; CHECK: ## BB#0: 3925; CHECK-NEXT: andl $1, %edi 3926; CHECK-NEXT: kmovw %edi, %k1 3927; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} 3928; CHECK-NEXT: vmovaps %zmm2, %zmm0 3929; CHECK-NEXT: retq 3930 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 3931 ret <2 x double> %res 3932} 3933 3934define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 3935; CHECK-LABEL: test_maskz_add_sd_rn: 3936; CHECK: ## BB#0: 3937; CHECK-NEXT: andl $1, %edi 3938; CHECK-NEXT: kmovw %edi, %k1 3939; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3940; CHECK-NEXT: retq 3941 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0) 3942 ret <2 x double> %res 3943} 3944 3945define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) { 3946; CHECK-LABEL: test_add_sd_rn: 3947; CHECK: ## BB#0: 3948; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 3949; CHECK-NEXT: retq 3950 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0) 3951 ret <2 x double> %res 3952} 3953 3954declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 3955 3956define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3957; CHECK-LABEL: test_mask_max_ss_sae: 3958; CHECK: ## BB#0: 3959; CHECK-NEXT: andl $1, %edi 3960; CHECK-NEXT: kmovw %edi, %k1 3961; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 3962; CHECK-NEXT: vmovaps %zmm2, %zmm0 3963; CHECK-NEXT: retq 3964 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 3965 ret <4 x float> %res 3966} 3967 3968define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 3969; CHECK-LABEL: test_maskz_max_ss_sae: 3970; CHECK: ## BB#0: 3971; CHECK-NEXT: andl $1, %edi 3972; CHECK-NEXT: kmovw %edi, %k1 3973; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 3974; CHECK-NEXT: retq 3975 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 3976 ret <4 x float> %res 3977} 3978 3979define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) { 3980; CHECK-LABEL: test_max_ss_sae: 3981; CHECK: ## BB#0: 3982; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 3983; CHECK-NEXT: retq 3984 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 3985 ret <4 x float> %res 3986} 3987 3988define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 3989; CHECK-LABEL: test_mask_max_ss: 3990; CHECK: ## BB#0: 3991; CHECK-NEXT: andl $1, %edi 3992; CHECK-NEXT: kmovw %edi, %k1 3993; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1} 3994; CHECK-NEXT: vmovaps %zmm2, %zmm0 3995; CHECK-NEXT: retq 3996 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 3997 ret <4 x float> %res 3998} 3999 4000define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4001; CHECK-LABEL: test_maskz_max_ss: 4002; CHECK: ## BB#0: 4003; CHECK-NEXT: andl $1, %edi 4004; CHECK-NEXT: kmovw %edi, %k1 4005; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z} 4006; CHECK-NEXT: retq 4007 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4) 4008 ret <4 x float> %res 4009} 4010 4011define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) { 4012; CHECK-LABEL: test_max_ss: 4013; CHECK: ## BB#0: 4014; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 4015; CHECK-NEXT: retq 4016 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4) 4017 ret <4 x float> %res 4018} 4019declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 4020 4021define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 4022; CHECK-LABEL: test_mask_max_sd_sae: 4023; CHECK: ## BB#0: 4024; CHECK-NEXT: andl $1, %edi 4025; CHECK-NEXT: kmovw %edi, %k1 4026; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 4027; CHECK-NEXT: vmovaps %zmm2, %zmm0 4028; CHECK-NEXT: retq 4029 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 4030 ret <2 x double> %res 4031} 4032 4033define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 4034; CHECK-LABEL: test_maskz_max_sd_sae: 4035; CHECK: ## BB#0: 4036; CHECK-NEXT: andl $1, %edi 4037; CHECK-NEXT: kmovw %edi, %k1 4038; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 4039; CHECK-NEXT: retq 4040 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 4041 ret <2 x double> %res 4042} 4043 4044define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) { 4045; CHECK-LABEL: test_max_sd_sae: 4046; CHECK: ## BB#0: 4047; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 4048; CHECK-NEXT: retq 4049 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) 4050 ret <2 x double> %res 4051} 4052 4053define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 4054; CHECK-LABEL: test_mask_max_sd: 4055; CHECK: ## BB#0: 4056; CHECK-NEXT: andl $1, %edi 4057; CHECK-NEXT: kmovw %edi, %k1 4058; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1} 4059; CHECK-NEXT: vmovaps %zmm2, %zmm0 4060; CHECK-NEXT: retq 4061 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 4062 ret <2 x double> %res 4063} 4064 4065define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 4066; CHECK-LABEL: test_maskz_max_sd: 4067; CHECK: ## BB#0: 4068; CHECK-NEXT: andl $1, %edi 4069; CHECK-NEXT: kmovw %edi, %k1 4070; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} 4071; CHECK-NEXT: retq 4072 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4) 4073 ret <2 x double> %res 4074} 4075 4076define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) { 4077; CHECK-LABEL: test_max_sd: 4078; CHECK: ## BB#0: 4079; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 4080; CHECK-NEXT: retq 4081 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 4082 ret <2 x double> %res 4083} 4084 4085define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { 4086; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: 4087; CHECK: ## BB#0: 4088; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 4089; CHECK-NEXT: retq 4090 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] 4091 ret <2 x double> %res 4092} 4093declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone 4094 4095define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { 4096; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: 4097; CHECK: ## BB#0: 4098; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 4099; CHECK-NEXT: retq 4100 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] 4101 ret <2 x double> %res 4102} 4103declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone 4104 4105define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { 4106; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: 4107; CHECK: ## BB#0: 4108; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 4109; CHECK-NEXT: retq 4110 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] 4111 ret <4 x float> %res 4112} 4113declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone 4114 4115define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { 4116; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: 4117; CHECK: ## BB#0: 4118; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 4119; CHECK-NEXT: retq 4120 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] 4121 ret <4 x float> %res 4122} 4123declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone 4124 4125define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) 4126; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: 4127; CHECK: ## BB#0: 4128; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 4129; CHECK-NEXT: retq 4130{ 4131 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 4132 ret <4 x float> %res 4133} 4134 4135define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) 4136; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: 4137; CHECK: ## BB#0: 4138; CHECK-NEXT: movl (%rdi), %eax 4139; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 4140; CHECK-NEXT: retq 4141{ 4142 %b = load i32, i32* %ptr 4143 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] 4144 ret <4 x float> %res 4145} 4146 4147define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) 4148; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: 4149; CHECK: ## BB#0: 4150; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 4151; CHECK-NEXT: retq 4152{ 4153 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 4154 ret <4 x float> %res 4155} 4156 4157define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr) 4158; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: 4159; CHECK: ## BB#0: 4160; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 4161; CHECK-NEXT: retq 4162{ 4163 %b = load i32, i32* %ptr 4164 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] 4165 ret <4 x float> %res 4166} 4167declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone 4168 4169define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) 4170; CHECK-LABEL: _mm_cvt_roundu64_ss: 4171; CHECK: ## BB#0: 4172; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 4173; CHECK-NEXT: retq 4174{ 4175 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] 4176 ret <4 x float> %res 4177} 4178 4179define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) 4180; CHECK-LABEL: _mm_cvtu64_ss: 4181; CHECK: ## BB#0: 4182; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 4183; CHECK-NEXT: retq 4184{ 4185 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] 4186 ret <4 x float> %res 4187} 4188declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone 4189 4190define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) 4191; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: 4192; CHECK: ## BB#0: 4193; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 4194; CHECK-NEXT: retq 4195{ 4196 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] 4197 ret <2 x double> %res 4198} 4199declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone 4200 4201define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) 4202; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: 4203; CHECK: ## BB#0: 4204; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 4205; CHECK-NEXT: retq 4206{ 4207 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] 4208 ret <2 x double> %res 4209} 4210 4211define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) 4212; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: 4213; CHECK: ## BB#0: 4214; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 4215; CHECK-NEXT: retq 4216{ 4217 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] 4218 ret <2 x double> %res 4219} 4220declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone 4221 4222define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { 4223; CHECK-LABEL: test_vpmaxq: 4224; CHECK: ## BB#0: 4225; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 4226; CHECK-NEXT: retq 4227 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, 4228 <8 x i64>zeroinitializer, i8 -1) 4229 ret <8 x i64> %res 4230} 4231declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4232 4233define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { 4234; CHECK-LABEL: test_vpminud: 4235; CHECK: ## BB#0: 4236; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 4237; CHECK-NEXT: retq 4238 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, 4239 <16 x i32>zeroinitializer, i16 -1) 4240 ret <16 x i32> %res 4241} 4242declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4243 4244define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { 4245; CHECK-LABEL: test_vpmaxsd: 4246; CHECK: ## BB#0: 4247; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 4248; CHECK-NEXT: retq 4249 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, 4250 <16 x i32>zeroinitializer, i16 -1) 4251 ret <16 x i32> %res 4252} 4253declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4254 4255define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4256; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512: 4257; CHECK: ## BB#0: 4258; CHECK-NEXT: kmovw %edi, %k1 4259; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} 4260; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 4261; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4262; CHECK-NEXT: retq 4263 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4264 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4265 %res2 = add <16 x i32> %res, %res1 4266 ret <16 x i32> %res2 4267} 4268 4269define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4270; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512: 4271; CHECK: ## BB#0: 4272; CHECK-NEXT: movzbl %dil, %eax 4273; CHECK-NEXT: kmovw %eax, %k1 4274; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} 4275; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 4276; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4277; CHECK-NEXT: retq 4278 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4279 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4280 %res2 = add <8 x i64> %res, %res1 4281 ret <8 x i64> %res2 4282} 4283 4284declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4285 4286define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4287; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512: 4288; CHECK: ## BB#0: 4289; CHECK-NEXT: kmovw %edi, %k1 4290; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} 4291; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 4292; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4293; CHECK-NEXT: retq 4294 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4295 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4296 %res2 = add <16 x i32> %res, %res1 4297 ret <16 x i32> %res2 4298} 4299 4300declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4301 4302define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4303; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512: 4304; CHECK: ## BB#0: 4305; CHECK-NEXT: movzbl %dil, %eax 4306; CHECK-NEXT: kmovw %eax, %k1 4307; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} 4308; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 4309; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4310; CHECK-NEXT: retq 4311 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4312 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4313 %res2 = add <8 x i64> %res, %res1 4314 ret <8 x i64> %res2 4315} 4316 4317declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4318 4319define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4320; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512: 4321; CHECK: ## BB#0: 4322; CHECK-NEXT: kmovw %edi, %k1 4323; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} 4324; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0 4325; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4326; CHECK-NEXT: retq 4327 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4328 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4329 %res2 = add <16 x i32> %res, %res1 4330 ret <16 x i32> %res2 4331} 4332 4333declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4334 4335define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4336; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512: 4337; CHECK: ## BB#0: 4338; CHECK-NEXT: movzbl %dil, %eax 4339; CHECK-NEXT: kmovw %eax, %k1 4340; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} 4341; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0 4342; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4343; CHECK-NEXT: retq 4344 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4345 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4346 %res2 = add <8 x i64> %res, %res1 4347 ret <8 x i64> %res2 4348} 4349 4350define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4351; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512: 4352; CHECK: ## BB#0: 4353; CHECK-NEXT: kmovw %edi, %k1 4354; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} 4355; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 4356; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4357; CHECK-NEXT: retq 4358 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4359 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4360 %res2 = add <16 x i32> %res, %res1 4361 ret <16 x i32> %res2 4362} 4363 4364declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4365 4366define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4367; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512: 4368; CHECK: ## BB#0: 4369; CHECK-NEXT: movzbl %dil, %eax 4370; CHECK-NEXT: kmovw %eax, %k1 4371; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} 4372; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0 4373; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4374; CHECK-NEXT: retq 4375 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4376 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4377 %res2 = add <8 x i64> %res, %res1 4378 ret <8 x i64> %res2 4379} 4380 4381declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4382 4383define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { 4384; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512: 4385; CHECK: ## BB#0: 4386; CHECK-NEXT: kmovw %esi, %k1 4387; CHECK-NEXT: vmovaps %zmm1, %zmm3 4388; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1} 4389; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1 4390; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 4391; CHECK-NEXT: retq 4392 %x2 = load <16 x i32>, <16 x i32>* %x2p 4393 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4394 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) 4395 %res2 = add <16 x i32> %res, %res1 4396 ret <16 x i32> %res2 4397} 4398 4399declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 4400 4401define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 4402; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512: 4403; CHECK: ## BB#0: 4404; CHECK-NEXT: movzbl %dil, %eax 4405; CHECK-NEXT: kmovw %eax, %k1 4406; CHECK-NEXT: vmovaps %zmm1, %zmm3 4407; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1} 4408; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 4409; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0 4410; CHECK-NEXT: retq 4411 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 4412 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 4413 %res2 = fadd <8 x double> %res, %res1 4414 ret <8 x double> %res2 4415} 4416 4417declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 4418 4419define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 4420; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512: 4421; CHECK: ## BB#0: 4422; CHECK-NEXT: kmovw %edi, %k1 4423; CHECK-NEXT: vmovaps %zmm1, %zmm3 4424; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1} 4425; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 4426; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 4427; CHECK-NEXT: retq 4428 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 4429 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 4430 %res2 = fadd <16 x float> %res, %res1 4431 ret <16 x float> %res2 4432} 4433 4434declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4435 4436define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4437; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512: 4438; CHECK: ## BB#0: 4439; CHECK-NEXT: movzbl %dil, %eax 4440; CHECK-NEXT: kmovw %eax, %k1 4441; CHECK-NEXT: vmovaps %zmm1, %zmm3 4442; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1} 4443; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 4444; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 4445; CHECK-NEXT: retq 4446 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4447 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4448 %res2 = add <8 x i64> %res, %res1 4449 ret <8 x i64> %res2 4450} 4451 4452declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4453 4454define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) { 4455; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512: 4456; CHECK: ## BB#0: 4457; CHECK-NEXT: kmovw %esi, %k1 4458; CHECK-NEXT: vmovaps %zmm1, %zmm2 4459; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} 4460; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1 4461; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0 4462; CHECK-NEXT: retq 4463 %x2 = load <16 x i32>, <16 x i32>* %x2p 4464 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4465 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1) 4466 %res2 = add <16 x i32> %res, %res1 4467 ret <16 x i32> %res2 4468} 4469 4470declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) 4471 4472define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) { 4473; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512: 4474; CHECK: ## BB#0: 4475; CHECK-NEXT: movzbl %sil, %eax 4476; CHECK-NEXT: kmovw %eax, %k1 4477; CHECK-NEXT: vmovaps %zmm1, %zmm2 4478; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} 4479; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1 4480; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0 4481; CHECK-NEXT: retq 4482 %x2s = load double, double* %x2ptr 4483 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 4484 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer 4485 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4486 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1) 4487 %res2 = fadd <8 x double> %res, %res1 4488 ret <8 x double> %res2 4489} 4490 4491declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 4492 4493define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4494; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512: 4495; CHECK: ## BB#0: 4496; CHECK-NEXT: kmovw %edi, %k1 4497; CHECK-NEXT: vmovaps %zmm1, %zmm3 4498; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z} 4499; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1 4500; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0 4501; CHECK-NEXT: retq 4502 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4503 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4504 %res2 = fadd <16 x float> %res, %res1 4505 ret <16 x float> %res2 4506} 4507 4508 4509declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4510 4511define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4512; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512: 4513; CHECK: ## BB#0: 4514; CHECK-NEXT: movzbl %dil, %eax 4515; CHECK-NEXT: kmovw %eax, %k1 4516; CHECK-NEXT: vmovaps %zmm1, %zmm3 4517; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z} 4518; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1 4519; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0 4520; CHECK-NEXT: retq 4521 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4522 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4523 %res2 = add <8 x i64> %res, %res1 4524 ret <8 x i64> %res2 4525} 4526 4527declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4528 4529define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4530; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512: 4531; CHECK: ## BB#0: 4532; CHECK-NEXT: kmovw %edi, %k1 4533; CHECK-NEXT: vmovaps %zmm1, %zmm3 4534; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1} 4535; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 4536; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0 4537; CHECK-NEXT: retq 4538 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4539 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4540 %res2 = add <16 x i32> %res, %res1 4541 ret <16 x i32> %res2 4542} 4543 4544declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 4545define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4546; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512: 4547; CHECK: ## BB#0: 4548; CHECK-NEXT: movzbl %dil, %eax 4549; CHECK-NEXT: kmovw %eax, %k1 4550; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 4551; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0 4552; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4553; CHECK-NEXT: retq 4554 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) 4555 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 4556 %res2 = fadd <8 x double> %res, %res1 4557 ret <8 x double> %res2 4558} 4559 4560declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 4561define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4562; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512: 4563; CHECK: ## BB#0: 4564; CHECK-NEXT: kmovw %edi, %k1 4565; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 4566; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0 4567; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4568; CHECK-NEXT: retq 4569 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) 4570 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 4571 %res2 = fadd <16 x float> %res, %res1 4572 ret <16 x float> %res2 4573} 4574 4575declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 4576 4577define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4578; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: 4579; CHECK: ## BB#0: 4580; CHECK-NEXT: movzbl %dil, %eax 4581; CHECK-NEXT: kmovw %eax, %k1 4582; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] 4583; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 4584; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4585; CHECK-NEXT: retq 4586 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4587 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 4588 %res2 = fadd <8 x double> %res, %res1 4589 ret <8 x double> %res2 4590} 4591 4592declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 4593 4594define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4595; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: 4596; CHECK: ## BB#0: 4597; CHECK-NEXT: kmovw %edi, %k1 4598; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] 4599; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 4600; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4601; CHECK-NEXT: retq 4602 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4603 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4604 %res2 = fadd <16 x float> %res, %res1 4605 ret <16 x float> %res2 4606} 4607 4608declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 4609 4610define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 4611; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: 4612; CHECK: ## BB#0: 4613; CHECK-NEXT: movzbl %dil, %eax 4614; CHECK-NEXT: kmovw %eax, %k1 4615; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] 4616; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 4617; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 4618; CHECK-NEXT: retq 4619 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 4620 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 4621 %res2 = fadd <8 x double> %res, %res1 4622 ret <8 x double> %res2 4623} 4624 4625declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 4626 4627define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 4628; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: 4629; CHECK: ## BB#0: 4630; CHECK-NEXT: kmovw %edi, %k1 4631; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] 4632; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 4633; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 4634; CHECK-NEXT: retq 4635 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 4636 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 4637 %res2 = fadd <16 x float> %res, %res1 4638 ret <16 x float> %res2 4639} 4640 4641declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4642 4643define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4644; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: 4645; CHECK: ## BB#0: 4646; CHECK-NEXT: movzbl %dil, %eax 4647; CHECK-NEXT: kmovw %eax, %k1 4648; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6] 4649; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6] 4650; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 4651; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4652; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 4653; CHECK-NEXT: retq 4654 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4655 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4656 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) 4657 %res3 = add <8 x i64> %res, %res1 4658 %res4 = add <8 x i64> %res2, %res3 4659 ret <8 x i64> %res4 4660} 4661 4662declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 4663 4664define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 4665; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: 4666; CHECK: ## BB#0: 4667; CHECK-NEXT: movzbl %dil, %eax 4668; CHECK-NEXT: kmovw %eax, %k1 4669; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7] 4670; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 4671; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 4672; CHECK-NEXT: retq 4673 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 4674 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 4675 %res2 = add <8 x i64> %res, %res1 4676 ret <8 x i64> %res2 4677} 4678 4679declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4680 4681define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4682; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: 4683; CHECK: ## BB#0: 4684; CHECK-NEXT: kmovw %edi, %k1 4685; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15] 4686; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 4687; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4688; CHECK-NEXT: retq 4689 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4690 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4691 %res2 = add <16 x i32> %res, %res1 4692 ret <16 x i32> %res2 4693} 4694 4695declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 4696 4697define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 4698; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512: 4699; CHECK: ## BB#0: 4700; CHECK-NEXT: kmovw %edi, %k1 4701; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13] 4702; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 4703; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 4704; CHECK-NEXT: retq 4705 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 4706 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 4707 %res2 = add <16 x i32> %res, %res1 4708 ret <16 x i32> %res2 4709} 4710 4711declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) 4712 4713define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4714; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: 4715; CHECK: ## BB#0: 4716; CHECK-NEXT: kmovw %edi, %k1 4717; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1} 4718; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} 4719; CHECK-NEXT: vpmovqb %zmm0, %xmm0 4720; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4721; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4722; CHECK-NEXT: retq 4723 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4724 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4725 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4726 %res3 = add <16 x i8> %res0, %res1 4727 %res4 = add <16 x i8> %res3, %res2 4728 ret <16 x i8> %res4 4729} 4730 4731declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4732 4733define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4734; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: 4735; CHECK: ## BB#0: 4736; CHECK-NEXT: movzbl %sil, %eax 4737; CHECK-NEXT: kmovw %eax, %k1 4738; CHECK-NEXT: vpmovqb %zmm0, (%rdi) 4739; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1} 4740; CHECK-NEXT: retq 4741 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4742 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4743 ret void 4744} 4745 4746declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) 4747 4748define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4749; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: 4750; CHECK: ## BB#0: 4751; CHECK-NEXT: kmovw %edi, %k1 4752; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1} 4753; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} 4754; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 4755; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4756; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4757; CHECK-NEXT: retq 4758 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4759 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4760 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4761 %res3 = add <16 x i8> %res0, %res1 4762 %res4 = add <16 x i8> %res3, %res2 4763 ret <16 x i8> %res4 4764} 4765 4766declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4767 4768define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4769; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: 4770; CHECK: ## BB#0: 4771; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) 4772; CHECK-NEXT: kmovw %esi, %k1 4773; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} 4774; CHECK-NEXT: retq 4775 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4776 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4777 ret void 4778} 4779 4780declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) 4781 4782define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { 4783; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: 4784; CHECK: ## BB#0: 4785; CHECK-NEXT: kmovw %edi, %k1 4786; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1} 4787; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} 4788; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 4789; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 4790; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 4791; CHECK-NEXT: retq 4792 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) 4793 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) 4794 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 4795 %res3 = add <16 x i8> %res0, %res1 4796 %res4 = add <16 x i8> %res3, %res2 4797 ret <16 x i8> %res4 4798} 4799 4800declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) 4801 4802define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4803; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: 4804; CHECK: ## BB#0: 4805; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) 4806; CHECK-NEXT: kmovw %esi, %k1 4807; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} 4808; CHECK-NEXT: retq 4809 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4810 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4811 ret void 4812} 4813 4814declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) 4815 4816define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4817; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: 4818; CHECK: ## BB#0: 4819; CHECK-NEXT: movzbl %dil, %eax 4820; CHECK-NEXT: kmovw %eax, %k1 4821; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1} 4822; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} 4823; CHECK-NEXT: vpmovqw %zmm0, %xmm0 4824; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4825; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4826; CHECK-NEXT: retq 4827 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4828 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4829 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4830 %res3 = add <8 x i16> %res0, %res1 4831 %res4 = add <8 x i16> %res3, %res2 4832 ret <8 x i16> %res4 4833} 4834 4835declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4836 4837define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4838; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: 4839; CHECK: ## BB#0: 4840; CHECK-NEXT: movzbl %sil, %eax 4841; CHECK-NEXT: kmovw %eax, %k1 4842; CHECK-NEXT: vpmovqw %zmm0, (%rdi) 4843; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1} 4844; CHECK-NEXT: retq 4845 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4846 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4847 ret void 4848} 4849 4850declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) 4851 4852define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4853; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: 4854; CHECK: ## BB#0: 4855; CHECK-NEXT: movzbl %dil, %eax 4856; CHECK-NEXT: kmovw %eax, %k1 4857; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1} 4858; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} 4859; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 4860; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4861; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4862; CHECK-NEXT: retq 4863 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4864 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4865 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4866 %res3 = add <8 x i16> %res0, %res1 4867 %res4 = add <8 x i16> %res3, %res2 4868 ret <8 x i16> %res4 4869} 4870 4871declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4872 4873define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4874; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: 4875; CHECK: ## BB#0: 4876; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) 4877; CHECK-NEXT: kmovw %esi, %k1 4878; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} 4879; CHECK-NEXT: retq 4880 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4881 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4882 ret void 4883} 4884 4885declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) 4886 4887define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { 4888; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: 4889; CHECK: ## BB#0: 4890; CHECK-NEXT: movzbl %dil, %eax 4891; CHECK-NEXT: kmovw %eax, %k1 4892; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1} 4893; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} 4894; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 4895; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 4896; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 4897; CHECK-NEXT: retq 4898 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) 4899 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) 4900 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 4901 %res3 = add <8 x i16> %res0, %res1 4902 %res4 = add <8 x i16> %res3, %res2 4903 ret <8 x i16> %res4 4904} 4905 4906declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) 4907 4908define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4909; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: 4910; CHECK: ## BB#0: 4911; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) 4912; CHECK-NEXT: kmovw %esi, %k1 4913; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} 4914; CHECK-NEXT: retq 4915 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4916 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4917 ret void 4918} 4919 4920declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) 4921 4922define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4923; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: 4924; CHECK: ## BB#0: 4925; CHECK-NEXT: movzbl %dil, %eax 4926; CHECK-NEXT: kmovw %eax, %k1 4927; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1} 4928; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} 4929; CHECK-NEXT: vpmovqd %zmm0, %ymm0 4930; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4931; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4932; CHECK-NEXT: retq 4933 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4934 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4935 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4936 %res3 = add <8 x i32> %res0, %res1 4937 %res4 = add <8 x i32> %res3, %res2 4938 ret <8 x i32> %res4 4939} 4940 4941declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4942 4943define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4944; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: 4945; CHECK: ## BB#0: 4946; CHECK-NEXT: movzbl %sil, %eax 4947; CHECK-NEXT: kmovw %eax, %k1 4948; CHECK-NEXT: vpmovqd %zmm0, (%rdi) 4949; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1} 4950; CHECK-NEXT: retq 4951 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4952 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4953 ret void 4954} 4955 4956declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) 4957 4958define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4959; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: 4960; CHECK: ## BB#0: 4961; CHECK-NEXT: movzbl %dil, %eax 4962; CHECK-NEXT: kmovw %eax, %k1 4963; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1} 4964; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} 4965; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 4966; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4967; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 4968; CHECK-NEXT: retq 4969 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 4970 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 4971 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 4972 %res3 = add <8 x i32> %res0, %res1 4973 %res4 = add <8 x i32> %res3, %res2 4974 ret <8 x i32> %res4 4975} 4976 4977declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) 4978 4979define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 4980; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: 4981; CHECK: ## BB#0: 4982; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) 4983; CHECK-NEXT: kmovw %esi, %k1 4984; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} 4985; CHECK-NEXT: retq 4986 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 4987 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 4988 ret void 4989} 4990 4991declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) 4992 4993define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { 4994; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: 4995; CHECK: ## BB#0: 4996; CHECK-NEXT: movzbl %dil, %eax 4997; CHECK-NEXT: kmovw %eax, %k1 4998; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1} 4999; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} 5000; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 5001; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 5002; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 5003; CHECK-NEXT: retq 5004 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) 5005 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) 5006 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) 5007 %res3 = add <8 x i32> %res0, %res1 5008 %res4 = add <8 x i32> %res3, %res2 5009 ret <8 x i32> %res4 5010} 5011 5012declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) 5013 5014define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { 5015; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: 5016; CHECK: ## BB#0: 5017; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) 5018; CHECK-NEXT: kmovw %esi, %k1 5019; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} 5020; CHECK-NEXT: retq 5021 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) 5022 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) 5023 ret void 5024} 5025 5026declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) 5027 5028define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5029; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: 5030; CHECK: ## BB#0: 5031; CHECK-NEXT: kmovw %edi, %k1 5032; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1} 5033; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} 5034; CHECK-NEXT: vpmovdb %zmm0, %xmm0 5035; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5036; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5037; CHECK-NEXT: retq 5038 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5039 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5040 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5041 %res3 = add <16 x i8> %res0, %res1 5042 %res4 = add <16 x i8> %res3, %res2 5043 ret <16 x i8> %res4 5044} 5045 5046declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) 5047 5048define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5049; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: 5050; CHECK: ## BB#0: 5051; CHECK-NEXT: kmovw %esi, %k1 5052; CHECK-NEXT: vpmovdb %zmm0, (%rdi) 5053; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1} 5054; CHECK-NEXT: retq 5055 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5056 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5057 ret void 5058} 5059 5060declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) 5061 5062define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5063; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: 5064; CHECK: ## BB#0: 5065; CHECK-NEXT: kmovw %edi, %k1 5066; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1} 5067; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} 5068; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 5069; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5070; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5071; CHECK-NEXT: retq 5072 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5073 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5074 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5075 %res3 = add <16 x i8> %res0, %res1 5076 %res4 = add <16 x i8> %res3, %res2 5077 ret <16 x i8> %res4 5078} 5079 5080declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) 5081 5082define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5083; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: 5084; CHECK: ## BB#0: 5085; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) 5086; CHECK-NEXT: kmovw %esi, %k1 5087; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} 5088; CHECK-NEXT: retq 5089 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5090 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5091 ret void 5092} 5093 5094declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) 5095 5096define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { 5097; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: 5098; CHECK: ## BB#0: 5099; CHECK-NEXT: kmovw %edi, %k1 5100; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1} 5101; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} 5102; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 5103; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 5104; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 5105; CHECK-NEXT: retq 5106 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) 5107 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) 5108 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) 5109 %res3 = add <16 x i8> %res0, %res1 5110 %res4 = add <16 x i8> %res3, %res2 5111 ret <16 x i8> %res4 5112} 5113 5114declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) 5115 5116define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5117; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: 5118; CHECK: ## BB#0: 5119; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) 5120; CHECK-NEXT: kmovw %esi, %k1 5121; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} 5122; CHECK-NEXT: retq 5123 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5124 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5125 ret void 5126} 5127 5128declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) 5129 5130define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5131; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: 5132; CHECK: ## BB#0: 5133; CHECK-NEXT: kmovw %edi, %k1 5134; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1} 5135; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} 5136; CHECK-NEXT: vpmovdw %zmm0, %ymm0 5137; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5138; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5139; CHECK-NEXT: retq 5140 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5141 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5142 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5143 %res3 = add <16 x i16> %res0, %res1 5144 %res4 = add <16 x i16> %res3, %res2 5145 ret <16 x i16> %res4 5146} 5147 5148declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5149 5150define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5151; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: 5152; CHECK: ## BB#0: 5153; CHECK-NEXT: kmovw %esi, %k1 5154; CHECK-NEXT: vpmovdw %zmm0, (%rdi) 5155; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1} 5156; CHECK-NEXT: retq 5157 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5158 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5159 ret void 5160} 5161 5162declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) 5163 5164define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5165; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: 5166; CHECK: ## BB#0: 5167; CHECK-NEXT: kmovw %edi, %k1 5168; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1} 5169; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} 5170; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 5171; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5172; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5173; CHECK-NEXT: retq 5174 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5175 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5176 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5177 %res3 = add <16 x i16> %res0, %res1 5178 %res4 = add <16 x i16> %res3, %res2 5179 ret <16 x i16> %res4 5180} 5181 5182declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5183 5184define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5185; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: 5186; CHECK: ## BB#0: 5187; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) 5188; CHECK-NEXT: kmovw %esi, %k1 5189; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} 5190; CHECK-NEXT: retq 5191 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5192 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5193 ret void 5194} 5195 5196declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) 5197 5198define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { 5199; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: 5200; CHECK: ## BB#0: 5201; CHECK-NEXT: kmovw %edi, %k1 5202; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1} 5203; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} 5204; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 5205; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 5206; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 5207; CHECK-NEXT: retq 5208 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) 5209 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) 5210 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) 5211 %res3 = add <16 x i16> %res0, %res1 5212 %res4 = add <16 x i16> %res3, %res2 5213 ret <16 x i16> %res4 5214} 5215 5216declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) 5217 5218define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { 5219; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: 5220; CHECK: ## BB#0: 5221; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) 5222; CHECK-NEXT: kmovw %esi, %k1 5223; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} 5224; CHECK-NEXT: retq 5225 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) 5226 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) 5227 ret void 5228} 5229 5230declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 5231 5232define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 5233; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512: 5234; CHECK: ## BB#0: 5235; CHECK-NEXT: movzbl %dil, %eax 5236; CHECK-NEXT: kmovw %eax, %k1 5237; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} 5238; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 5239; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5240; CHECK-NEXT: retq 5241 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 5242 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 5243 %res2 = fadd <8 x double> %res, %res1 5244 ret <8 x double> %res2 5245} 5246 5247declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 5248 5249define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 5250; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512: 5251; CHECK: ## BB#0: 5252; CHECK-NEXT: kmovw %edi, %k1 5253; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} 5254; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 5255; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5256; CHECK-NEXT: retq 5257 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 5258 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 5259 %res2 = fadd <16 x float> %res, %res1 5260 ret <16 x float> %res2 5261} 5262 5263declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 5264 5265define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5266; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512: 5267; CHECK: ## BB#0: 5268; CHECK-NEXT: movzbl %dil, %eax 5269; CHECK-NEXT: kmovw %eax, %k1 5270; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1} 5271; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0 5272; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5273; CHECK-NEXT: retq 5274 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5275 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 5276 %res2 = add <8 x i32> %res, %res1 5277 ret <8 x i32> %res2 5278} 5279 5280declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 5281 5282define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) { 5283; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512: 5284; CHECK: ## BB#0: 5285; CHECK-NEXT: movzbl %dil, %eax 5286; CHECK-NEXT: kmovw %eax, %k1 5287; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1} 5288; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0 5289; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 5290; CHECK-NEXT: retq 5291 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4) 5292 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2) 5293 %res2 = fadd <8 x float> %res, %res1 5294 ret <8 x float> %res2 5295} 5296 5297declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 5298 5299define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5300; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512: 5301; CHECK: ## BB#0: 5302; CHECK-NEXT: movzbl %dil, %eax 5303; CHECK-NEXT: kmovw %eax, %k1 5304; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1} 5305; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0 5306; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5307; CHECK-NEXT: retq 5308 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2) 5309 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0) 5310 %res2 = add <8 x i32> %res, %res1 5311 ret <8 x i32> %res2 5312} 5313 5314declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32) 5315 5316define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5317; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512: 5318; CHECK: ## BB#0: 5319; CHECK-NEXT: kmovw %edi, %k1 5320; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1} 5321; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0 5322; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5323; CHECK-NEXT: retq 5324 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 5325 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 5326 %res2 = add <16 x i32> %res, %res1 5327 ret <16 x i32> %res2 5328} 5329 5330declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32) 5331 5332define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) { 5333; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512: 5334; CHECK: ## BB#0: 5335; CHECK-NEXT: movzbl %dil, %eax 5336; CHECK-NEXT: kmovw %eax, %k1 5337; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1} 5338; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0 5339; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5340; CHECK-NEXT: retq 5341 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4) 5342 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8) 5343 %res2 = fadd <8 x double> %res, %res1 5344 ret <8 x double> %res2 5345} 5346 5347declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 5348 5349define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5350; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512: 5351; CHECK: ## BB#0: 5352; CHECK-NEXT: kmovw %edi, %k1 5353; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1} 5354; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0 5355; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5356; CHECK-NEXT: retq 5357 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2) 5358 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0) 5359 %res2 = add <16 x i32> %res, %res1 5360 ret <16 x i32> %res2 5361} 5362 5363declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 5364 5365define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5366; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: 5367; CHECK: ## BB#0: 5368; CHECK-NEXT: movzbl %dil, %eax 5369; CHECK-NEXT: kmovw %eax, %k1 5370; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} 5371; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 5372; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5373; CHECK-NEXT: retq 5374 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5375 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 5376 %res2 = add <8 x i32> %res, %res1 5377 ret <8 x i32> %res2 5378} 5379 5380declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 5381 5382define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { 5383; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512: 5384; CHECK: ## BB#0: 5385; CHECK-NEXT: movzbl %dil, %eax 5386; CHECK-NEXT: kmovw %eax, %k1 5387; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} 5388; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 5389; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5390; CHECK-NEXT: retq 5391 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) 5392 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) 5393 %res2 = fadd <8 x double> %res, %res1 5394 ret <8 x double> %res2 5395} 5396 5397 5398declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 5399 5400define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) { 5401; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512: 5402; CHECK: ## BB#0: 5403; CHECK-NEXT: kmovw %edi, %k1 5404; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} 5405; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 5406; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5407; CHECK-NEXT: retq 5408 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) 5409 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0) 5410 %res2 = fadd <16 x float> %res, %res1 5411 ret <16 x float> %res2 5412} 5413 5414declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 5415 5416define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { 5417; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: 5418; CHECK: ## BB#0: 5419; CHECK-NEXT: movzbl %dil, %eax 5420; CHECK-NEXT: kmovw %eax, %k1 5421; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} 5422; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 5423; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 5424; CHECK-NEXT: retq 5425 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) 5426 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) 5427 %res2 = add <8 x i32> %res, %res1 5428 ret <8 x i32> %res2 5429} 5430 5431declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 5432 5433define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5434; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: 5435; CHECK: ## BB#0: 5436; CHECK-NEXT: kmovw %edi, %k1 5437; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} 5438; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 5439; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5440; CHECK-NEXT: retq 5441 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 5442 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 5443 %res2 = add <16 x i32> %res, %res1 5444 ret <16 x i32> %res2 5445} 5446 5447declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 5448 5449define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) { 5450; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: 5451; CHECK: ## BB#0: 5452; CHECK-NEXT: kmovw %edi, %k1 5453; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} 5454; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 5455; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 5456; CHECK-NEXT: retq 5457 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) 5458 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) 5459 %res2 = add <16 x i32> %res, %res1 5460 ret <16 x i32> %res2 5461} 5462 5463 5464declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) 5465define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 5466; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: 5467; CHECK: ## BB#0: 5468; CHECK-NEXT: andl $1, %edi 5469; CHECK-NEXT: kmovw %edi, %k1 5470; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} 5471; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 5472; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 5473; CHECK-NEXT: retq 5474 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) 5475 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) 5476 %res2 = fadd <4 x float> %res, %res1 5477 ret <4 x float> %res2 5478} 5479 5480declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) 5481define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 5482; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: 5483; CHECK: ## BB#0: 5484; CHECK-NEXT: andl $1, %edi 5485; CHECK-NEXT: kmovw %edi, %k1 5486; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} 5487; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 5488; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 5489; CHECK-NEXT: retq 5490 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) 5491 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) 5492 %res2 = fadd <2 x double> %res, %res1 5493 ret <2 x double> %res2 5494} 5495 5496declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 5497 5498define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 5499; CHECK-LABEL: test_getexp_ss: 5500; CHECK: ## BB#0: 5501; CHECK-NEXT: andl $1, %edi 5502; CHECK-NEXT: kmovw %edi, %k1 5503; CHECK-NEXT: vmovaps %zmm2, %zmm3 5504; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} 5505; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 5506; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} 5507; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0 5508; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 5509; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 5510; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5511; CHECK-NEXT: retq 5512 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) 5513 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) 5514 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) 5515 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) 5516 5517 %res.1 = fadd <4 x float> %res0, %res1 5518 %res.2 = fadd <4 x float> %res2, %res3 5519 %res = fadd <4 x float> %res.1, %res.2 5520 ret <4 x float> %res 5521} 5522 5523declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 5524 5525define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 5526; CHECK-LABEL: test_getexp_sd: 5527; CHECK: ## BB#0: 5528; CHECK-NEXT: andl $1, %edi 5529; CHECK-NEXT: kmovw %edi, %k1 5530; CHECK-NEXT: vmovaps %zmm2, %zmm3 5531; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} 5532; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 5533; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 5534; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} 5535; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 5536; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 5537; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 5538; CHECK-NEXT: retq 5539 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) 5540 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) 5541 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) 5542 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) 5543 5544 %res.1 = fadd <2 x double> %res0, %res1 5545 %res.2 = fadd <2 x double> %res2, %res3 5546 %res = fadd <2 x double> %res.1, %res.2 5547 ret <2 x double> %res 5548} 5549 5550declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) 5551 5552define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 5553; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd: 5554; CHECK: ## BB#0: 5555; CHECK-NEXT: andl $1, %edi 5556; CHECK-NEXT: kmovw %edi, %k1 5557; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1} 5558; CHECK-NEXT: kmovw %k0, %eax 5559; CHECK-NEXT: shlb $7, %al 5560; CHECK-NEXT: sarb $7, %al 5561; CHECK-NEXT: retq 5562 5563 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 5564 ret i8 %res4 5565} 5566 5567define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) { 5568; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all: 5569; CHECK: ## BB#0: 5570; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0 5571; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1 5572; CHECK-NEXT: korw %k0, %k1, %k0 5573; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1 5574; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2 5575; CHECK-NEXT: korw %k1, %k2, %k1 5576; CHECK-NEXT: andl $1, %edi 5577; CHECK-NEXT: kmovw %edi, %k2 5578; CHECK-NEXT: kandw %k2, %k1, %k1 5579; CHECK-NEXT: korw %k1, %k0, %k0 5580; CHECK-NEXT: kmovw %k0, %eax 5581; CHECK-NEXT: shlb $7, %al 5582; CHECK-NEXT: sarb $7, %al 5583; CHECK-NEXT: retq 5584 5585 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) 5586 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) 5587 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) 5588 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) 5589 5590 %res11 = or i8 %res1, %res2 5591 %res12 = or i8 %res3, %res4 5592 %res13 = or i8 %res11, %res12 5593 ret i8 %res13 5594} 5595 5596declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) 5597 5598define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 5599; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: 5600; CHECK: ## BB#0: 5601; CHECK-NEXT: andl $1, %edi 5602; CHECK-NEXT: kmovw %edi, %k1 5603; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1} 5604; CHECK-NEXT: kmovw %k0, %eax 5605; CHECK-NEXT: shlb $7, %al 5606; CHECK-NEXT: sarb $7, %al 5607; CHECK-NEXT: retq 5608 5609 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) 5610 ret i8 %res2 5611} 5612 5613 5614define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) { 5615; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all: 5616; CHECK: ## BB#0: 5617; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1 5618; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1} 5619; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1 5620; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1} 5621; CHECK-NEXT: andl $1, %edi 5622; CHECK-NEXT: kmovw %edi, %k2 5623; CHECK-NEXT: kandw %k2, %k1, %k1 5624; CHECK-NEXT: kandw %k1, %k0, %k0 5625; CHECK-NEXT: kmovw %k0, %eax 5626; CHECK-NEXT: shlb $7, %al 5627; CHECK-NEXT: sarb $7, %al 5628; CHECK-NEXT: retq 5629 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) 5630 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) 5631 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) 5632 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) 5633 5634 %res11 = and i8 %res1, %res2 5635 %res12 = and i8 %res3, %res4 5636 %res13 = and i8 %res11, %res12 5637 ret i8 %res13 5638} 5639 5640declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) 5641 5642define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 5643; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4: 5644; CHECK: ## BB#0: 5645; CHECK-NEXT: kmovw %edi, %k1 5646; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5647; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5648; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5649; CHECK-NEXT: retq 5650 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 5651 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 5652 %res2 = fadd <16 x float> %res, %res1 5653 ret <16 x float> %res2 5654} 5655 5656declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8) 5657 5658define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 5659; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2: 5660; CHECK: ## BB#0: 5661; CHECK-NEXT: movzbl %dil, %eax 5662; CHECK-NEXT: kmovw %eax, %k1 5663; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5664; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5665; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5666; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5667; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 5668; CHECK-NEXT: retq 5669 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 5670 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 5671 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 5672 5673 %res3 = fadd <8 x double> %res, %res1 5674 %res4 = fadd <8 x double> %res3, %res2 5675 ret <8 x double> %res4 5676} 5677 5678declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) 5679 5680define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) { 5681; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4: 5682; CHECK: ## BB#0: 5683; CHECK-NEXT: kmovw %edi, %k1 5684; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5685; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] 5686; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5687; CHECK-NEXT: retq 5688 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) 5689 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) 5690 %res2 = add <16 x i32> %res, %res1 5691 ret <16 x i32> %res2 5692} 5693 5694declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) 5695 5696define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) { 5697; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2: 5698; CHECK: ## BB#0: 5699; CHECK-NEXT: movzbl %dil, %eax 5700; CHECK-NEXT: kmovw %eax, %k1 5701; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5702; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1] 5703; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5704; CHECK-NEXT: retq 5705 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) 5706 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) 5707 %res2 = add <8 x i64> %res, %res1 5708 ret <8 x i64> %res2 5709} 5710 5711declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 5712 5713define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 5714; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512: 5715; CHECK: ## BB#0: 5716; CHECK-NEXT: movzbl %dil, %eax 5717; CHECK-NEXT: kmovw %eax, %k1 5718; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 5719; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0 5720; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5721; CHECK-NEXT: retq 5722 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4) 5723 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8) 5724 %res2 = fadd <8 x double> %res, %res1 5725 ret <8 x double> %res2 5726} 5727 5728declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 5729 5730define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 5731; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512: 5732; CHECK: ## BB#0: 5733; CHECK-NEXT: kmovw %edi, %k1 5734; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 5735; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0 5736; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5737; CHECK-NEXT: retq 5738 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4) 5739 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8) 5740 %res2 = fadd <16 x float> %res, %res1 5741 ret <16 x float> %res2 5742} 5743 5744declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32) 5745 5746define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 5747; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd: 5748; CHECK: ## BB#0: 5749; CHECK-NEXT: andl $1, %edi 5750; CHECK-NEXT: kmovw %edi, %k1 5751; CHECK-NEXT: vmovaps %zmm2, %zmm3 5752; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1} 5753; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z} 5754; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5 5755; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1} 5756; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0 5757; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1 5758; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 5759; CHECK-NEXT: retq 5760 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) 5761 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4) 5762 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8) 5763 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4) 5764 %res11 = fadd <2 x double> %res, %res1 5765 %res12 = fadd <2 x double> %res2, %res3 5766 %res13 = fadd <2 x double> %res11, %res12 5767 ret <2 x double> %res13 5768} 5769 5770declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32) 5771 5772define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 5773; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss: 5774; CHECK: ## BB#0: 5775; CHECK-NEXT: andl $1, %edi 5776; CHECK-NEXT: kmovw %edi, %k1 5777; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1} 5778; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z} 5779; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4 5780; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0 5781; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 5782; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 5783; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5784; CHECK-NEXT: retq 5785 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) 5786 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4) 5787 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8) 5788 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4) 5789 %res11 = fadd <4 x float> %res, %res1 5790 %res12 = fadd <4 x float> %res2, %res3 5791 %res13 = fadd <4 x float> %res11, %res12 5792 ret <4 x float> %res13 5793} 5794 5795declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8) 5796 5797define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 5798; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512: 5799; CHECK: ## BB#0: 5800; CHECK-NEXT: movzbl %dil, %eax 5801; CHECK-NEXT: kmovw %eax, %k1 5802; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6] 5803; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6] 5804; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6] 5805; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5806; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0 5807; CHECK-NEXT: retq 5808 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) 5809 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) 5810 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) 5811 5812 %res3 = fadd <8 x double> %res, %res1 5813 %res4 = fadd <8 x double> %res3, %res2 5814 ret <8 x double> %res4 5815} 5816 5817declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16) 5818 5819define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 5820; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512: 5821; CHECK: ## BB#0: 5822; CHECK-NEXT: kmovw %edi, %k1 5823; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12] 5824; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12] 5825; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5826; CHECK-NEXT: retq 5827 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) 5828 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) 5829 %res2 = fadd <16 x float> %res, %res1 5830 ret <16 x float> %res2 5831} 5832 5833declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) 5834 5835define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 5836; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: 5837; CHECK: ## BB#0: 5838; CHECK-NEXT: movzbl %dil, %eax 5839; CHECK-NEXT: kmovw %eax, %k1 5840; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6] 5841; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6] 5842; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6] 5843; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 5844; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 5845; CHECK-NEXT: retq 5846 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) 5847 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) 5848 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) 5849 %res3 = fadd <8 x double> %res, %res1 5850 %res4 = fadd <8 x double> %res3, %res2 5851 ret <8 x double> %res4 5852} 5853 5854declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) 5855 5856define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 5857; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: 5858; CHECK: ## BB#0: 5859; CHECK-NEXT: kmovw %edi, %k1 5860; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5861; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5862; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 5863; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 5864; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 5865; CHECK-NEXT: retq 5866 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) 5867 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) 5868 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) 5869 %res3 = fadd <16 x float> %res, %res1 5870 %res4 = fadd <16 x float> %res3, %res2 5871 ret <16 x float> %res4 5872} 5873 5874declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) 5875 5876define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { 5877; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: 5878; CHECK: ## BB#0: 5879; CHECK-NEXT: movzbl %dil, %eax 5880; CHECK-NEXT: kmovw %eax, %k1 5881; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} 5882; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z} 5883; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 5884; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 5885; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 5886; CHECK-NEXT: retq 5887 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) 5888 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) 5889 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) 5890 %res3 = fadd <8 x double> %res, %res1 5891 %res4 = fadd <8 x double> %res2, %res3 5892 ret <8 x double> %res4 5893} 5894 5895declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) 5896 5897define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { 5898; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: 5899; CHECK: ## BB#0: 5900; CHECK-NEXT: kmovw %edi, %k1 5901; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} 5902; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z} 5903; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 5904; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 5905; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 5906; CHECK-NEXT: retq 5907 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) 5908 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) 5909 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) 5910 %res3 = fadd <16 x float> %res, %res1 5911 %res4 = fadd <16 x float> %res2, %res3 5912 ret <16 x float> %res4 5913} 5914 5915declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8) 5916 5917define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) { 5918; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512: 5919; CHECK: ## BB#0: 5920; CHECK-NEXT: kmovw %edi, %k1 5921; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5922; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5923; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 5924; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 5925; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 5926; CHECK-NEXT: retq 5927 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4) 5928 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1) 5929 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4) 5930 %res3 = fadd <16 x float> %res, %res1 5931 %res4 = fadd <16 x float> %res2, %res3 5932 ret <16 x float> %res4 5933} 5934 5935declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8) 5936 5937define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) { 5938; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512: 5939; CHECK: ## BB#0: 5940; CHECK-NEXT: kmovw %edi, %k1 5941; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} 5942; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 5943; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 5944; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 5945; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 5946; CHECK-NEXT: retq 5947 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4) 5948 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1) 5949 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4) 5950 %res3 = add <16 x i32> %res, %res1 5951 %res4 = add <16 x i32> %res2, %res3 5952 ret <16 x i32> %res4 5953} 5954 5955declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) 5956 5957define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) { 5958; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512: 5959; CHECK: ## BB#0: 5960; CHECK-NEXT: movzbl %dil, %eax 5961; CHECK-NEXT: kmovw %eax, %k1 5962; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5963; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5964; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 5965; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 5966; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 5967; CHECK-NEXT: retq 5968 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 5969 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 5970 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 5971 %res3 = fadd <8 x double> %res, %res1 5972 %res4 = fadd <8 x double> %res2, %res3 5973 ret <8 x double> %res4 5974} 5975 5976declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) 5977 5978define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) { 5979; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512: 5980; CHECK: ## BB#0: 5981; CHECK-NEXT: movzbl %dil, %eax 5982; CHECK-NEXT: kmovw %eax, %k1 5983; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} 5984; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 5985; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 5986; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 5987; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 5988; CHECK-NEXT: retq 5989 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 5990 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 5991 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 5992 %res3 = add <8 x i64> %res, %res1 5993 %res4 = add <8 x i64> %res2, %res3 5994 ret <8 x i64> %res4 5995} 5996 5997declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32) 5998 5999define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) { 6000; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round: 6001; CHECK: ## BB#0: 6002; CHECK-NEXT: andl $1, %edi 6003; CHECK-NEXT: kmovw %edi, %k1 6004; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1} 6005; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0 6006; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 6007; CHECK-NEXT: retq 6008 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4) 6009 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8) 6010 %res2 = fadd <2 x double> %res, %res1 6011 ret <2 x double> %res2 6012} 6013 6014declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32) 6015 6016define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) { 6017; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round: 6018; CHECK: ## BB#0: 6019; CHECK-NEXT: andl $1, %edi 6020; CHECK-NEXT: kmovw %edi, %k1 6021; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 6022; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0 6023; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 6024; CHECK-NEXT: retq 6025 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3) 6026 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8) 6027 %res2 = fadd <4 x float> %res, %res1 6028 ret <4 x float> %res2 6029} 6030 6031declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6032 6033define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6034; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512: 6035; CHECK: ## BB#0: 6036; CHECK-NEXT: kmovw %edi, %k1 6037; CHECK-NEXT: vmovaps %zmm0, %zmm3 6038; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} 6039; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 6040; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 6041; CHECK-NEXT: retq 6042 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6043 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6044 %res2 = add <16 x i32> %res, %res1 6045 ret <16 x i32> %res2 6046} 6047 6048declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) 6049 6050define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { 6051; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: 6052; CHECK: ## BB#0: 6053; CHECK-NEXT: kmovw %edi, %k1 6054; CHECK-NEXT: vmovaps %zmm0, %zmm3 6055; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 6056; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 6057; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 6058; CHECK-NEXT: retq 6059 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) 6060 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) 6061 %res2 = add <16 x i32> %res, %res1 6062 ret <16 x i32> %res2 6063} 6064 6065declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6066 6067define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6068; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512: 6069; CHECK: ## BB#0: 6070; CHECK-NEXT: movzbl %dil, %eax 6071; CHECK-NEXT: kmovw %eax, %k1 6072; CHECK-NEXT: vmovaps %zmm0, %zmm3 6073; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} 6074; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 6075; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 6076; CHECK-NEXT: retq 6077 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6078 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6079 %res2 = add <8 x i64> %res, %res1 6080 ret <8 x i64> %res2 6081} 6082 6083declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) 6084 6085define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { 6086; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: 6087; CHECK: ## BB#0: 6088; CHECK-NEXT: movzbl %dil, %eax 6089; CHECK-NEXT: kmovw %eax, %k1 6090; CHECK-NEXT: vmovaps %zmm0, %zmm3 6091; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z} 6092; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 6093; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 6094; CHECK-NEXT: retq 6095 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) 6096 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) 6097 %res2 = add <8 x i64> %res, %res1 6098 ret <8 x i64> %res2 6099} 6100 6101declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16) 6102 6103define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 6104; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512: 6105; CHECK: ## BB#0: 6106; CHECK-NEXT: kmovw %edi, %k1 6107; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6108; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6109; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 6110; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 6111; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 6112; CHECK-NEXT: retq 6113 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 6114 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 6115 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 6116 %res3 = fadd <16 x float> %res, %res1 6117 %res4 = fadd <16 x float> %res2, %res3 6118 ret <16 x float> %res4 6119} 6120 6121declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16) 6122 6123define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 6124; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512: 6125; CHECK: ## BB#0: 6126; CHECK-NEXT: kmovw %edi, %k1 6127; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6128; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6129; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 6130; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 6131; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 6132; CHECK-NEXT: retq 6133 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 6134 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 6135 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 6136 %res3 = fadd <16 x float> %res, %res1 6137 %res4 = fadd <16 x float> %res2, %res3 6138 ret <16 x float> %res4 6139} 6140 6141declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8) 6142 6143define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) { 6144; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512: 6145; CHECK: ## BB#0: 6146; CHECK-NEXT: movzbl %dil, %eax 6147; CHECK-NEXT: kmovw %eax, %k1 6148; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6] 6149; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6] 6150; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6] 6151; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 6152; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 6153; CHECK-NEXT: retq 6154 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2) 6155 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1) 6156 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2) 6157 %res3 = fadd <8 x double> %res, %res1 6158 %res4 = fadd <8 x double> %res2, %res3 6159 ret <8 x double> %res4 6160} 6161 6162define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 6163; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae: 6164; CHECK: ## BB#0: 6165; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 6166; CHECK-NEXT: sete %al 6167; CHECK-NEXT: movzbl %al, %eax 6168; CHECK-NEXT: retq 6169 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) 6170 ret i32 %res 6171} 6172 6173define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { 6174; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae: 6175; CHECK: ## BB#0: 6176; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 6177; CHECK-NEXT: sete %al 6178; CHECK-NEXT: movzbl %al, %eax 6179; CHECK-NEXT: retq 6180 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) 6181 ret i32 %res 6182} 6183 6184define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 6185; CHECK-LABEL: test_x86_avx512_comi_sd_eq: 6186; CHECK: ## BB#0: 6187; CHECK-NEXT: vcomisd %xmm1, %xmm0 6188; CHECK-NEXT: sete %al 6189; CHECK-NEXT: movzbl %al, %eax 6190; CHECK-NEXT: retq 6191 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) 6192 ret i32 %res 6193} 6194 6195define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { 6196; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq: 6197; CHECK: ## BB#0: 6198; CHECK-NEXT: vucomisd %xmm1, %xmm0 6199; CHECK-NEXT: sete %al 6200; CHECK-NEXT: movzbl %al, %eax 6201; CHECK-NEXT: retq 6202 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) 6203 ret i32 %res 6204} 6205 6206define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 6207; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae: 6208; CHECK: ## BB#0: 6209; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0 6210; CHECK-NEXT: sbbl %eax, %eax 6211; CHECK-NEXT: andl $1, %eax 6212; CHECK-NEXT: retq 6213 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) 6214 ret i32 %res 6215} 6216 6217define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { 6218; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae: 6219; CHECK: ## BB#0: 6220; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0 6221; CHECK-NEXT: sbbl %eax, %eax 6222; CHECK-NEXT: andl $1, %eax 6223; CHECK-NEXT: retq 6224 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) 6225 ret i32 %res 6226} 6227 6228define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 6229; CHECK-LABEL: test_x86_avx512_comi_sd_lt: 6230; CHECK: ## BB#0: 6231; CHECK-NEXT: vcomisd %xmm1, %xmm0 6232; CHECK-NEXT: sbbl %eax, %eax 6233; CHECK-NEXT: andl $1, %eax 6234; CHECK-NEXT: retq 6235 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) 6236 ret i32 %res 6237} 6238 6239define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { 6240; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt: 6241; CHECK: ## BB#0: 6242; CHECK-NEXT: vucomisd %xmm1, %xmm0 6243; CHECK-NEXT: sbbl %eax, %eax 6244; CHECK-NEXT: andl $1, %eax 6245; CHECK-NEXT: retq 6246 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) 6247 ret i32 %res 6248} 6249 6250declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) 6251 6252define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { 6253; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt: 6254; CHECK: ## BB#0: 6255; CHECK-NEXT: vucomiss %xmm1, %xmm0 6256; CHECK-NEXT: sbbl %eax, %eax 6257; CHECK-NEXT: andl $1, %eax 6258; CHECK-NEXT: retq 6259 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) 6260 ret i32 %res 6261} 6262 6263declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) 6264declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) 6265 6266define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 6267; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: 6268; CHECK: ## BB#0: 6269; CHECK-NEXT: andl $1, %edi 6270; CHECK-NEXT: kmovw %edi, %k1 6271; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} 6272; CHECK-NEXT: vmovaps %zmm2, %zmm0 6273; CHECK-NEXT: retq 6274 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 6275 ret <4 x float> %res 6276} 6277 6278define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 6279; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: 6280; CHECK: ## BB#0: 6281; CHECK-NEXT: andl $1, %edi 6282; CHECK-NEXT: kmovw %edi, %k1 6283; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 6284; CHECK-NEXT: retq 6285 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) 6286 ret <4 x float> %res 6287} 6288 6289define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 6290; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: 6291; CHECK: ## BB#0: 6292; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 6293; CHECK-NEXT: retq 6294 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) 6295 ret <4 x float> %res 6296} 6297 6298declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) 6299define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 6300; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: 6301; CHECK: ## BB#0: 6302; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 6303; CHECK-NEXT: retq 6304 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) 6305 ret <2 x double> %res 6306} 6307 6308define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 6309; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: 6310; CHECK: ## BB#0: 6311; CHECK-NEXT: andl $1, %edi 6312; CHECK-NEXT: kmovw %edi, %k1 6313; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 6314; CHECK-NEXT: retq 6315 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) 6316 ret <2 x double> %res 6317} 6318 6319define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 6320; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: 6321; CHECK: ## BB#0: 6322; CHECK-NEXT: andl $1, %edi 6323; CHECK-NEXT: kmovw %edi, %k1 6324; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} 6325; CHECK-NEXT: vmovaps %zmm2, %zmm0 6326; CHECK-NEXT: retq 6327 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 6328 ret <2 x double> %res 6329} 6330 6331