1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 3 4declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly 5 6define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) { 7; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512: 8; CHECK: ## BB#0: 9; CHECK-NEXT: vbroadcastss %xmm0, %zmm2 10; CHECK-NEXT: kmovw %edi, %k1 11; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} 12; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} 13; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1 14; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 15; CHECK-NEXT: retq 16 17 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1) 18 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask) 19 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask) 20 %res3 = fadd <16 x float> %res, %res1 21 %res4 = fadd <16 x float> %res2, %res3 22 ret <16 x float> %res4 23} 24 25declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly 26 27define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) { 28; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512: 29; CHECK: ## BB#0: 30; CHECK-NEXT: vbroadcastsd %xmm0, %zmm2 31; CHECK-NEXT: kmovw %edi, %k1 32; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} 33; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} 34; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm1 35; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 36; CHECK-NEXT: retq 37 38 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1) 39 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask) 40 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask) 41 %res3 = fadd <8 x double> %res, %res1 42 %res4 = fadd <8 x double> %res2, %res3 43 ret <8 x double> %res4 44} 45 46declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) 47 48define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) { 49; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512: 50; CHECK: ## BB#0: 51; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 52; CHECK-NEXT: kmovw %edi, %k1 53; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} 54; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} 55; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm1 56; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 57; CHECK-NEXT: retq 58 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) 59 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) 60 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 61 %res3 = add <16 x i32> %res, %res1 62 %res4 = add <16 x i32> %res2, %res3 63 ret <16 x i32> %res4 64} 65 66declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) 67 68define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) { 69; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512: 70; CHECK: ## BB#0: 71; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 72; CHECK-NEXT: kmovw %edi, %k1 73; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} 74; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} 75; CHECK-NEXT: vpaddq %zmm1, %zmm2, %zmm1 76; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 77; CHECK-NEXT: retq 78 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) 79 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) 80 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) 81 %res3 = add <8 x i64> %res, %res1 82 %res4 = add <8 x i64> %res2, %res3 83 ret <8 x i64> %res4 84} 85 86declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16) 87 88define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 89; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512: 90; CHECK: ## BB#0: 91; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 92; CHECK-NEXT: kmovw %edi, %k1 93; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 94; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] 95; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 96; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 97; CHECK-NEXT: retq 98 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 99 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 100 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 101 %res3 = fadd <16 x float> %res, %res1 102 %res4 = fadd <16 x float> %res2, %res3 103 ret <16 x float> %res4 104} 105 106declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16) 107 108define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) { 109; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512: 110; CHECK: ## BB#0: 111; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 112; CHECK-NEXT: kmovw %edi, %k1 113; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 114; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] 115; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 116; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 117; CHECK-NEXT: retq 118 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) 119 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) 120 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) 121 %res3 = fadd <16 x float> %res, %res1 122 %res4 = fadd <16 x float> %res2, %res3 123 ret <16 x float> %res4 124} 125 126declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8) 127 128define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) { 129; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512: 130; CHECK: ## BB#0: 131; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6] 132; CHECK-NEXT: kmovw %edi, %k1 133; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] 134; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] 135; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 136; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 137; CHECK-NEXT: retq 138 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2) 139 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1) 140 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2) 141 %res3 = fadd <8 x double> %res, %res1 142 %res4 = fadd <8 x double> %res2, %res3 143 ret <8 x double> %res4 144} 145 146declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8) 147 148define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) { 149; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_512: 150; CHECK: ## BB#0: 151; CHECK-NEXT: vpermpd {{.*#+}} zmm2 = zmm0[3,0,0,0,7,4,4,4] 152; CHECK-NEXT: kmovw %esi, %k1 153; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 154; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 155; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 156; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0 157; CHECK-NEXT: retq 158 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3) 159 %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3) 160 %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1) 161 %res3 = fadd <8 x double> %res, %res1 162 %res4 = fadd <8 x double> %res3, %res2 163 ret <8 x double> %res4 164} 165 166declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8) 167 168define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { 169; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_512: 170; CHECK: ## BB#0: 171; CHECK-NEXT: vpermq {{.*#+}} zmm2 = zmm0[3,0,0,0,7,4,4,4] 172; CHECK-NEXT: kmovw %esi, %k1 173; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4] 174; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4] 175; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 176; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0 177; CHECK-NEXT: retq 178 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) 179 %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) 180 %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) 181 %res3 = add <8 x i64> %res, %res1 182 %res4 = add <8 x i64> %res3, %res2 183 ret <8 x i64> %res4 184} 185 186define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { 187; CHECK-LABEL: test_store1: 188; CHECK: ## BB#0: 189; CHECK-NEXT: kmovw %edx, %k1 190; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1} 191; CHECK-NEXT: vmovups %zmm0, (%rsi) 192; CHECK-NEXT: retq 193 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 194 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) 195 ret void 196} 197 198declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 199 200define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { 201; CHECK-LABEL: test_store2: 202; CHECK: ## BB#0: 203; CHECK-NEXT: kmovw %edx, %k1 204; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1} 205; CHECK-NEXT: vmovupd %zmm0, (%rsi) 206; CHECK-NEXT: retq 207 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 208 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) 209 ret void 210} 211 212declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) 213 214define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) { 215; CHECK-LABEL: test_mask_store_aligned_ps: 216; CHECK: ## BB#0: 217; CHECK-NEXT: kmovw %edx, %k1 218; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} 219; CHECK-NEXT: vmovaps %zmm0, (%rsi) 220; CHECK-NEXT: retq 221 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 222 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1) 223 ret void 224} 225 226declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) 227 228define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) { 229; CHECK-LABEL: test_mask_store_aligned_pd: 230; CHECK: ## BB#0: 231; CHECK-NEXT: kmovw %edx, %k1 232; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} 233; CHECK-NEXT: vmovapd %zmm0, (%rsi) 234; CHECK-NEXT: retq 235 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 236 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1) 237 ret void 238} 239 240declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) 241 242define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { 243; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512: 244; CHECK: ## BB#0: 245; CHECK-NEXT: kmovw %edx, %k1 246; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} 247; CHECK-NEXT: vmovdqu64 %zmm0, (%rsi) 248; CHECK-NEXT: retq 249 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) 250 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) 251 ret void 252} 253 254declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8) 255 256define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { 257; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512: 258; CHECK: ## BB#0: 259; CHECK-NEXT: kmovw %edx, %k1 260; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} 261; CHECK-NEXT: vmovdqu32 %zmm0, (%rsi) 262; CHECK-NEXT: retq 263 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) 264 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) 265 ret void 266} 267 268declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16) 269 270define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) { 271; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512: 272; CHECK: ## BB#0: 273; CHECK-NEXT: kmovw %edx, %k1 274; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} 275; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) 276; CHECK-NEXT: retq 277 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2) 278 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1) 279 ret void 280} 281 282declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8) 283 284define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) { 285; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512: 286; CHECK: ## BB#0: 287; CHECK-NEXT: kmovw %edx, %k1 288; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} 289; CHECK-NEXT: vmovdqa32 %zmm0, (%rsi) 290; CHECK-NEXT: retq 291 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) 292 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) 293 ret void 294} 295 296declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16) 297 298define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 299; CHECK-LABEL: test_mask_load_aligned_ps: 300; CHECK: ## BB#0: 301; CHECK-NEXT: vmovaps (%rdi), %zmm0 302; CHECK-NEXT: kmovw %esi, %k1 303; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} 304; CHECK-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z} 305; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 306; CHECK-NEXT: retq 307 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 308 %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask) 309 %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 310 %res4 = fadd <16 x float> %res2, %res1 311 ret <16 x float> %res4 312} 313 314declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) 315 316define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 317; CHECK-LABEL: test_mask_load_unaligned_ps: 318; CHECK: ## BB#0: 319; CHECK-NEXT: vmovups (%rdi), %zmm0 320; CHECK-NEXT: kmovw %esi, %k1 321; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} 322; CHECK-NEXT: vmovups (%rdi), %zmm1 {%k1} {z} 323; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 324; CHECK-NEXT: retq 325 %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 326 %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask) 327 %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 328 %res4 = fadd <16 x float> %res2, %res1 329 ret <16 x float> %res4 330} 331 332declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16) 333 334define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 335; CHECK-LABEL: test_mask_load_aligned_pd: 336; CHECK: ## BB#0: 337; CHECK-NEXT: vmovapd (%rdi), %zmm0 338; CHECK-NEXT: kmovw %esi, %k1 339; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} 340; CHECK-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z} 341; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 342; CHECK-NEXT: retq 343 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 344 %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask) 345 %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 346 %res4 = fadd <8 x double> %res2, %res1 347 ret <8 x double> %res4 348} 349 350declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8) 351 352define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 353; CHECK-LABEL: test_mask_load_unaligned_pd: 354; CHECK: ## BB#0: 355; CHECK-NEXT: vmovupd (%rdi), %zmm0 356; CHECK-NEXT: kmovw %esi, %k1 357; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} 358; CHECK-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z} 359; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 360; CHECK-NEXT: retq 361 %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 362 %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask) 363 %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 364 %res4 = fadd <8 x double> %res2, %res1 365 ret <8 x double> %res4 366} 367 368declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8) 369 370declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16) 371 372define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) { 373; CHECK-LABEL: test_mask_load_unaligned_d: 374; CHECK: ## BB#0: 375; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 376; CHECK-NEXT: kmovw %edx, %k1 377; CHECK-NEXT: vmovdqu32 (%rsi), %zmm0 {%k1} 378; CHECK-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} {z} 379; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 380; CHECK-NEXT: retq 381 %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1) 382 %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask) 383 %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask) 384 %res4 = add <16 x i32> %res2, %res1 385 ret <16 x i32> %res4 386} 387 388declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8) 389 390define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) { 391; CHECK-LABEL: test_mask_load_unaligned_q: 392; CHECK: ## BB#0: 393; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 394; CHECK-NEXT: kmovw %edx, %k1 395; CHECK-NEXT: vmovdqu64 (%rsi), %zmm0 {%k1} 396; CHECK-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} {z} 397; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 398; CHECK-NEXT: retq 399 %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1) 400 %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask) 401 %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask) 402 %res4 = add <8 x i64> %res2, %res1 403 ret <8 x i64> %res4 404} 405 406declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16) 407 408define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) { 409; CHECK-LABEL: test_mask_load_aligned_d: 410; CHECK: ## BB#0: 411; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 412; CHECK-NEXT: kmovw %esi, %k1 413; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} 414; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1 {%k1} {z} 415; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 416; CHECK-NEXT: retq 417 %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1) 418 %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask) 419 %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask) 420 %res4 = add <16 x i32> %res2, %res1 421 ret <16 x i32> %res4 422} 423 424declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8) 425 426define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) { 427; CHECK-LABEL: test_mask_load_aligned_q: 428; CHECK: ## BB#0: 429; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 430; CHECK-NEXT: kmovw %esi, %k1 431; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} 432; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 {%k1} {z} 433; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 434; CHECK-NEXT: retq 435 %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1) 436 %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask) 437 %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask) 438 %res4 = add <8 x i64> %res2, %res1 439 ret <8 x i64> %res4 440} 441 442declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) 443 444define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 445; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: 446; CHECK: ## BB#0: 447; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6] 448; CHECK-NEXT: kmovw %edi, %k1 449; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] 450; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] 451; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 452; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0 453; CHECK-NEXT: retq 454 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) 455 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) 456 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) 457 %res3 = fadd <8 x double> %res, %res1 458 %res4 = fadd <8 x double> %res3, %res2 459 ret <8 x double> %res4 460} 461 462declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) 463 464define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 465; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: 466; CHECK: ## BB#0: 467; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 468; CHECK-NEXT: kmovw %edi, %k1 469; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 470; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] 471; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 472; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0 473; CHECK-NEXT: retq 474 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) 475 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) 476 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) 477 %res3 = fadd <16 x float> %res, %res1 478 %res4 = fadd <16 x float> %res3, %res2 479 ret <16 x float> %res4 480} 481 482declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16) 483 484define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 485; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_512: 486; CHECK: ## BB#0: 487; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 488; CHECK-NEXT: kmovw %esi, %k1 489; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 490; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12] 491; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 492; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0 493; CHECK-NEXT: retq 494 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) 495 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) 496 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) 497 %res3 = add <16 x i32> %res, %res1 498 %res4 = add <16 x i32> %res3, %res2 499 ret <16 x i32> %res4 500} 501 502define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 503; CHECK-LABEL: test_pcmpeq_d: 504; CHECK: ## BB#0: 505; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 506; CHECK-NEXT: kmovw %k0, %eax 507; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 508; CHECK-NEXT: retq 509 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 510 ret i16 %res 511} 512 513define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 514; CHECK-LABEL: test_mask_pcmpeq_d: 515; CHECK: ## BB#0: 516; CHECK-NEXT: kmovw %edi, %k1 517; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 518; CHECK-NEXT: kmovw %k0, %eax 519; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 520; CHECK-NEXT: retq 521 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 522 ret i16 %res 523} 524 525declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 526 527define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 528; CHECK-LABEL: test_pcmpeq_q: 529; CHECK: ## BB#0: 530; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 531; CHECK-NEXT: kmovw %k0, %eax 532; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 533; CHECK-NEXT: retq 534 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 535 ret i8 %res 536} 537 538define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 539; CHECK-LABEL: test_mask_pcmpeq_q: 540; CHECK: ## BB#0: 541; CHECK-NEXT: kmovw %edi, %k1 542; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 543; CHECK-NEXT: kmovw %k0, %eax 544; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 545; CHECK-NEXT: retq 546 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 547 ret i8 %res 548} 549 550declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 551 552define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 553; CHECK-LABEL: test_pcmpgt_d: 554; CHECK: ## BB#0: 555; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 556; CHECK-NEXT: kmovw %k0, %eax 557; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 558; CHECK-NEXT: retq 559 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 560 ret i16 %res 561} 562 563define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 564; CHECK-LABEL: test_mask_pcmpgt_d: 565; CHECK: ## BB#0: 566; CHECK-NEXT: kmovw %edi, %k1 567; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 568; CHECK-NEXT: kmovw %k0, %eax 569; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 570; CHECK-NEXT: retq 571 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 572 ret i16 %res 573} 574 575declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 576 577define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 578; CHECK-LABEL: test_pcmpgt_q: 579; CHECK: ## BB#0: 580; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 581; CHECK-NEXT: kmovw %k0, %eax 582; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 583; CHECK-NEXT: retq 584 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 585 ret i8 %res 586} 587 588define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 589; CHECK-LABEL: test_mask_pcmpgt_q: 590; CHECK: ## BB#0: 591; CHECK-NEXT: kmovw %edi, %k1 592; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 593; CHECK-NEXT: kmovw %k0, %eax 594; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 595; CHECK-NEXT: retq 596 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 597 ret i8 %res 598} 599 600declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 601 602declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 603 604define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 605; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: 606; CHECK: ## BB#0: 607; CHECK-NEXT: vunpckhpd {{.*#+}} zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 608; CHECK-NEXT: kmovw %edi, %k1 609; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 610; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0 611; CHECK-NEXT: retq 612 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 613 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 614 %res2 = fadd <8 x double> %res, %res1 615 ret <8 x double> %res2 616} 617 618declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 619 620define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 621; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: 622; CHECK: ## BB#0: 623; CHECK-NEXT: vunpckhps {{.*#+}} zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 624; CHECK-NEXT: kmovw %edi, %k1 625; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 626; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0 627; CHECK-NEXT: retq 628 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 629 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 630 %res2 = fadd <16 x float> %res, %res1 631 ret <16 x float> %res2 632} 633 634declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) 635 636define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { 637; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: 638; CHECK: ## BB#0: 639; CHECK-NEXT: vunpcklpd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 640; CHECK-NEXT: kmovw %edi, %k1 641; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 642; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0 643; CHECK-NEXT: retq 644 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) 645 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) 646 %res2 = fadd <8 x double> %res, %res1 647 ret <8 x double> %res2 648} 649 650declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 651 652define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 653; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: 654; CHECK: ## BB#0: 655; CHECK-NEXT: vunpcklps {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 656; CHECK-NEXT: kmovw %edi, %k1 657; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 658; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0 659; CHECK-NEXT: retq 660 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) 661 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) 662 %res2 = fadd <16 x float> %res, %res1 663 ret <16 x float> %res2 664} 665 666declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 667 668define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 669; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: 670; CHECK: ## BB#0: 671; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 672; CHECK-NEXT: kmovw %edi, %k1 673; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 674; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] 675; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 676; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 677; CHECK-NEXT: retq 678 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 679 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 680 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) 681 %res3 = add <8 x i64> %res, %res1 682 %res4 = add <8 x i64> %res2, %res3 683 ret <8 x i64> %res4 684} 685 686declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 687 688define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { 689; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: 690; CHECK: ## BB#0: 691; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 692; CHECK-NEXT: kmovw %edi, %k1 693; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] 694; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm0 695; CHECK-NEXT: retq 696 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) 697 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) 698 %res2 = add <8 x i64> %res, %res1 699 ret <8 x i64> %res2 700} 701 702declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 703 704define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 705; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: 706; CHECK: ## BB#0: 707; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 708; CHECK-NEXT: kmovw %edi, %k1 709; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 710; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm0 711; CHECK-NEXT: retq 712 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 713 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 714 %res2 = add <16 x i32> %res, %res1 715 ret <16 x i32> %res2 716} 717 718declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 719 720define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { 721; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512: 722; CHECK: ## BB#0: 723; CHECK-NEXT: vpunpckldq {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 724; CHECK-NEXT: kmovw %edi, %k1 725; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 726; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm0 727; CHECK-NEXT: retq 728 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) 729 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) 730 %res2 = add <16 x i32> %res, %res1 731 ret <16 x i32> %res2 732} 733 734define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 735; CHECK-LABEL: test_x86_avx512_pslli_d: 736; CHECK: ## BB#0: 737; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 738; CHECK-NEXT: retq 739 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 740 ret <16 x i32> %res 741} 742 743define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 744; CHECK-LABEL: test_x86_avx512_mask_pslli_d: 745; CHECK: ## BB#0: 746; CHECK-NEXT: kmovw %edi, %k1 747; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} 748; CHECK-NEXT: vmovaps %zmm1, %zmm0 749; CHECK-NEXT: retq 750 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 751 ret <16 x i32> %res 752} 753 754define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 755; CHECK-LABEL: test_x86_avx512_maskz_pslli_d: 756; CHECK: ## BB#0: 757; CHECK-NEXT: kmovw %edi, %k1 758; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} 759; CHECK-NEXT: retq 760 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 761 ret <16 x i32> %res 762} 763 764declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 765 766define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 767; CHECK-LABEL: test_x86_avx512_pslli_q: 768; CHECK: ## BB#0: 769; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 770; CHECK-NEXT: retq 771 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 772 ret <8 x i64> %res 773} 774 775define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 776; CHECK-LABEL: test_x86_avx512_mask_pslli_q: 777; CHECK: ## BB#0: 778; CHECK-NEXT: kmovw %edi, %k1 779; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} 780; CHECK-NEXT: vmovaps %zmm1, %zmm0 781; CHECK-NEXT: retq 782 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 783 ret <8 x i64> %res 784} 785 786define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 787; CHECK-LABEL: test_x86_avx512_maskz_pslli_q: 788; CHECK: ## BB#0: 789; CHECK-NEXT: kmovw %edi, %k1 790; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 791; CHECK-NEXT: retq 792 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 793 ret <8 x i64> %res 794} 795 796declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 797 798define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 799; CHECK-LABEL: test_x86_avx512_psrli_d: 800; CHECK: ## BB#0: 801; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 802; CHECK-NEXT: retq 803 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 804 ret <16 x i32> %res 805} 806 807define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 808; CHECK-LABEL: test_x86_avx512_mask_psrli_d: 809; CHECK: ## BB#0: 810; CHECK-NEXT: kmovw %edi, %k1 811; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} 812; CHECK-NEXT: vmovaps %zmm1, %zmm0 813; CHECK-NEXT: retq 814 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 815 ret <16 x i32> %res 816} 817 818define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 819; CHECK-LABEL: test_x86_avx512_maskz_psrli_d: 820; CHECK: ## BB#0: 821; CHECK-NEXT: kmovw %edi, %k1 822; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 823; CHECK-NEXT: retq 824 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 825 ret <16 x i32> %res 826} 827 828declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 829 830define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 831; CHECK-LABEL: test_x86_avx512_psrli_q: 832; CHECK: ## BB#0: 833; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 834; CHECK-NEXT: retq 835 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 836 ret <8 x i64> %res 837} 838 839define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 840; CHECK-LABEL: test_x86_avx512_mask_psrli_q: 841; CHECK: ## BB#0: 842; CHECK-NEXT: kmovw %edi, %k1 843; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} 844; CHECK-NEXT: vmovaps %zmm1, %zmm0 845; CHECK-NEXT: retq 846 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 847 ret <8 x i64> %res 848} 849 850define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 851; CHECK-LABEL: test_x86_avx512_maskz_psrli_q: 852; CHECK: ## BB#0: 853; CHECK-NEXT: kmovw %edi, %k1 854; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 855; CHECK-NEXT: retq 856 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 857 ret <8 x i64> %res 858} 859 860declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 861 862define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 863; CHECK-LABEL: test_x86_avx512_psrai_d: 864; CHECK: ## BB#0: 865; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 866; CHECK-NEXT: retq 867 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 868 ret <16 x i32> %res 869} 870 871define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 872; CHECK-LABEL: test_x86_avx512_mask_psrai_d: 873; CHECK: ## BB#0: 874; CHECK-NEXT: kmovw %edi, %k1 875; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} 876; CHECK-NEXT: vmovaps %zmm1, %zmm0 877; CHECK-NEXT: retq 878 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 879 ret <16 x i32> %res 880} 881 882define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 883; CHECK-LABEL: test_x86_avx512_maskz_psrai_d: 884; CHECK: ## BB#0: 885; CHECK-NEXT: kmovw %edi, %k1 886; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 887; CHECK-NEXT: retq 888 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 889 ret <16 x i32> %res 890} 891 892declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 893 894define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 895; CHECK-LABEL: test_x86_avx512_psrai_q: 896; CHECK: ## BB#0: 897; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 898; CHECK-NEXT: retq 899 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 900 ret <8 x i64> %res 901} 902 903define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 904; CHECK-LABEL: test_x86_avx512_mask_psrai_q: 905; CHECK: ## BB#0: 906; CHECK-NEXT: kmovw %edi, %k1 907; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} 908; CHECK-NEXT: vmovaps %zmm1, %zmm0 909; CHECK-NEXT: retq 910 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 911 ret <8 x i64> %res 912} 913 914define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 915; CHECK-LABEL: test_x86_avx512_maskz_psrai_q: 916; CHECK: ## BB#0: 917; CHECK-NEXT: kmovw %edi, %k1 918; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 919; CHECK-NEXT: retq 920 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 921 ret <8 x i64> %res 922} 923 924declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 925 926declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>) 927 928define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) { 929; CHECK-LABEL: test_storent_q_512: 930; CHECK: ## BB#0: 931; CHECK-NEXT: vmovntdq %zmm0, (%rdi) 932; CHECK-NEXT: retq 933 call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data) 934 ret void 935} 936 937declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>) 938 939define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) { 940; CHECK-LABEL: test_storent_pd_512: 941; CHECK: ## BB#0: 942; CHECK-NEXT: vmovntpd %zmm0, (%rdi) 943; CHECK-NEXT: retq 944 call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data) 945 ret void 946} 947 948declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>) 949 950define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) { 951; CHECK-LABEL: test_storent_ps_512: 952; CHECK: ## BB#0: 953; CHECK-NEXT: vmovntps %zmm0, (%rdi) 954; CHECK-NEXT: retq 955 call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data) 956 ret void 957} 958 959define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { 960; CHECK-LABEL: test_xor_epi32: 961; CHECK: ## BB#0: 962; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 963; CHECK-NEXT: retq 964 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 965 ret < 16 x i32> %res 966} 967 968define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 969; CHECK-LABEL: test_mask_xor_epi32: 970; CHECK: ## BB#0: 971; CHECK-NEXT: kmovw %edi, %k1 972; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} 973; CHECK-NEXT: vmovaps %zmm2, %zmm0 974; CHECK-NEXT: retq 975 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 976 ret < 16 x i32> %res 977} 978 979declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 980 981define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { 982; CHECK-LABEL: test_or_epi32: 983; CHECK: ## BB#0: 984; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 985; CHECK-NEXT: retq 986 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 987 ret < 16 x i32> %res 988} 989 990define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 991; CHECK-LABEL: test_mask_or_epi32: 992; CHECK: ## BB#0: 993; CHECK-NEXT: kmovw %edi, %k1 994; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} 995; CHECK-NEXT: vmovaps %zmm2, %zmm0 996; CHECK-NEXT: retq 997 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 998 ret < 16 x i32> %res 999} 1000 1001declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1002 1003define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { 1004; CHECK-LABEL: test_and_epi32: 1005; CHECK: ## BB#0: 1006; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 1007; CHECK-NEXT: retq 1008 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1009 ret < 16 x i32> %res 1010} 1011 1012define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1013; CHECK-LABEL: test_mask_and_epi32: 1014; CHECK: ## BB#0: 1015; CHECK-NEXT: kmovw %edi, %k1 1016; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} 1017; CHECK-NEXT: vmovaps %zmm2, %zmm0 1018; CHECK-NEXT: retq 1019 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1020 ret < 16 x i32> %res 1021} 1022 1023declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1024 1025define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { 1026; CHECK-LABEL: test_xor_epi64: 1027; CHECK: ## BB#0: 1028; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 1029; CHECK-NEXT: retq 1030 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1031 ret < 8 x i64> %res 1032} 1033 1034define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1035; CHECK-LABEL: test_mask_xor_epi64: 1036; CHECK: ## BB#0: 1037; CHECK-NEXT: kmovw %edi, %k1 1038; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} 1039; CHECK-NEXT: vmovaps %zmm2, %zmm0 1040; CHECK-NEXT: retq 1041 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1042 ret < 8 x i64> %res 1043} 1044 1045declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1046 1047define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { 1048; CHECK-LABEL: test_or_epi64: 1049; CHECK: ## BB#0: 1050; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 1051; CHECK-NEXT: retq 1052 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1053 ret < 8 x i64> %res 1054} 1055 1056define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1057; CHECK-LABEL: test_mask_or_epi64: 1058; CHECK: ## BB#0: 1059; CHECK-NEXT: kmovw %edi, %k1 1060; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} 1061; CHECK-NEXT: vmovaps %zmm2, %zmm0 1062; CHECK-NEXT: retq 1063 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1064 ret < 8 x i64> %res 1065} 1066 1067declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1068 1069define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { 1070; CHECK-LABEL: test_and_epi64: 1071; CHECK: ## BB#0: 1072; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 1073; CHECK-NEXT: retq 1074 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1075 ret < 8 x i64> %res 1076} 1077 1078define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1079; CHECK-LABEL: test_mask_and_epi64: 1080; CHECK: ## BB#0: 1081; CHECK-NEXT: kmovw %edi, %k1 1082; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} 1083; CHECK-NEXT: vmovaps %zmm2, %zmm0 1084; CHECK-NEXT: retq 1085 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1086 ret < 8 x i64> %res 1087} 1088 1089declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1090