1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) { 5; CHECK-LABEL: test_mask_mullo_epi64_rr_512: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 10 ret <8 x i64> %res 11} 12 13define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 14; CHECK-LABEL: test_mask_mullo_epi64_rrk_512: 15; CHECK: ## BB#0: 16; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 17; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1] 18; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 19; CHECK-NEXT: retq ## encoding: [0xc3] 20 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 21 ret <8 x i64> %res 22} 23 24define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 25; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512: 26; CHECK: ## BB#0: 27; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 28; CHECK-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1] 29; CHECK-NEXT: retq ## encoding: [0xc3] 30 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 31 ret <8 x i64> %res 32} 33 34define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) { 35; CHECK-LABEL: test_mask_mullo_epi64_rm_512: 36; CHECK: ## BB#0: 37; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07] 38; CHECK-NEXT: retq ## encoding: [0xc3] 39 %b = load <8 x i64>, <8 x i64>* %ptr_b 40 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 41 ret <8 x i64> %res 42} 43 44define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 45; CHECK-LABEL: test_mask_mullo_epi64_rmk_512: 46; CHECK: ## BB#0: 47; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 48; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f] 49; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 50; CHECK-NEXT: retq ## encoding: [0xc3] 51 %b = load <8 x i64>, <8 x i64>* %ptr_b 52 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 53 ret <8 x i64> %res 54} 55 56define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 57; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512: 58; CHECK: ## BB#0: 59; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 60; CHECK-NEXT: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07] 61; CHECK-NEXT: retq ## encoding: [0xc3] 62 %b = load <8 x i64>, <8 x i64>* %ptr_b 63 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 64 ret <8 x i64> %res 65} 66 67define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) { 68; CHECK-LABEL: test_mask_mullo_epi64_rmb_512: 69; CHECK: ## BB#0: 70; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07] 71; CHECK-NEXT: retq ## encoding: [0xc3] 72 %q = load i64, i64* %ptr_b 73 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 74 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 75 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 76 ret <8 x i64> %res 77} 78 79define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 80; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512: 81; CHECK: ## BB#0: 82; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 83; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f] 84; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 85; CHECK-NEXT: retq ## encoding: [0xc3] 86 %q = load i64, i64* %ptr_b 87 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 88 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 89 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 90 ret <8 x i64> %res 91} 92 93define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 94; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512: 95; CHECK: ## BB#0: 96; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 97; CHECK-NEXT: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07] 98; CHECK-NEXT: retq ## encoding: [0xc3] 99 %q = load i64, i64* %ptr_b 100 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 101 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 102 %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 103 ret <8 x i64> %res 104} 105declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 106 107define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 108; CHECK-LABEL: test_mask_mullo_epi64_rr_256: 109; CHECK: ## BB#0: 110; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1] 111; CHECK-NEXT: retq ## encoding: [0xc3] 112 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 113 ret <4 x i64> %res 114} 115 116define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 117; CHECK-LABEL: test_mask_mullo_epi64_rrk_256: 118; CHECK: ## BB#0: 119; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 120; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] 121; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 122; CHECK-NEXT: retq ## encoding: [0xc3] 123 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 124 ret <4 x i64> %res 125} 126 127define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 128; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256: 129; CHECK: ## BB#0: 130; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 131; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1] 132; CHECK-NEXT: retq ## encoding: [0xc3] 133 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 134 ret <4 x i64> %res 135} 136 137define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 138; CHECK-LABEL: test_mask_mullo_epi64_rm_256: 139; CHECK: ## BB#0: 140; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07] 141; CHECK-NEXT: retq ## encoding: [0xc3] 142 %b = load <4 x i64>, <4 x i64>* %ptr_b 143 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 144 ret <4 x i64> %res 145} 146 147define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 148; CHECK-LABEL: test_mask_mullo_epi64_rmk_256: 149; CHECK: ## BB#0: 150; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 151; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] 152; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 153; CHECK-NEXT: retq ## encoding: [0xc3] 154 %b = load <4 x i64>, <4 x i64>* %ptr_b 155 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 156 ret <4 x i64> %res 157} 158 159define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 160; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256: 161; CHECK: ## BB#0: 162; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 163; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07] 164; CHECK-NEXT: retq ## encoding: [0xc3] 165 %b = load <4 x i64>, <4 x i64>* %ptr_b 166 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 167 ret <4 x i64> %res 168} 169 170define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 171; CHECK-LABEL: test_mask_mullo_epi64_rmb_256: 172; CHECK: ## BB#0: 173; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07] 174; CHECK-NEXT: retq ## encoding: [0xc3] 175 %q = load i64, i64* %ptr_b 176 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 177 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 178 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 179 ret <4 x i64> %res 180} 181 182define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 183; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256: 184; CHECK: ## BB#0: 185; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 186; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] 187; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 188; CHECK-NEXT: retq ## encoding: [0xc3] 189 %q = load i64, i64* %ptr_b 190 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 191 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 192 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 193 ret <4 x i64> %res 194} 195 196define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 197; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256: 198; CHECK: ## BB#0: 199; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 200; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07] 201; CHECK-NEXT: retq ## encoding: [0xc3] 202 %q = load i64, i64* %ptr_b 203 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 204 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 205 %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 206 ret <4 x i64> %res 207} 208 209declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 210 211define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 212; CHECK-LABEL: test_mask_mullo_epi64_rr_128: 213; CHECK: ## BB#0: 214; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 215; CHECK-NEXT: retq ## encoding: [0xc3] 216 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 217 ret <2 x i64> %res 218} 219 220define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 221; CHECK-LABEL: test_mask_mullo_epi64_rrk_128: 222; CHECK: ## BB#0: 223; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 224; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] 225; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 226; CHECK-NEXT: retq ## encoding: [0xc3] 227 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 228 ret <2 x i64> %res 229} 230 231define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 232; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128: 233; CHECK: ## BB#0: 234; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 235; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1] 236; CHECK-NEXT: retq ## encoding: [0xc3] 237 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 238 ret <2 x i64> %res 239} 240 241define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 242; CHECK-LABEL: test_mask_mullo_epi64_rm_128: 243; CHECK: ## BB#0: 244; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07] 245; CHECK-NEXT: retq ## encoding: [0xc3] 246 %b = load <2 x i64>, <2 x i64>* %ptr_b 247 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 248 ret <2 x i64> %res 249} 250 251define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 252; CHECK-LABEL: test_mask_mullo_epi64_rmk_128: 253; CHECK: ## BB#0: 254; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 255; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] 256; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 257; CHECK-NEXT: retq ## encoding: [0xc3] 258 %b = load <2 x i64>, <2 x i64>* %ptr_b 259 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 260 ret <2 x i64> %res 261} 262 263define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 264; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128: 265; CHECK: ## BB#0: 266; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 267; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07] 268; CHECK-NEXT: retq ## encoding: [0xc3] 269 %b = load <2 x i64>, <2 x i64>* %ptr_b 270 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 271 ret <2 x i64> %res 272} 273 274define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 275; CHECK-LABEL: test_mask_mullo_epi64_rmb_128: 276; CHECK: ## BB#0: 277; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07] 278; CHECK-NEXT: retq ## encoding: [0xc3] 279 %q = load i64, i64* %ptr_b 280 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 281 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 282 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 283 ret <2 x i64> %res 284} 285 286define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 287; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128: 288; CHECK: ## BB#0: 289; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 290; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] 291; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 292; CHECK-NEXT: retq ## encoding: [0xc3] 293 %q = load i64, i64* %ptr_b 294 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 295 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 296 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 297 ret <2 x i64> %res 298} 299 300define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 301; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128: 302; CHECK: ## BB#0: 303; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 304; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07] 305; CHECK-NEXT: retq ## encoding: [0xc3] 306 %q = load i64, i64* %ptr_b 307 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 308 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 309 %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 310 ret <2 x i64> %res 311} 312 313declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 314 315define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) { 316; CHECK-LABEL: test_mask_andnot_ps_rr_128: 317; CHECK: ## BB#0: 318; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1] 319; CHECK-NEXT: retq ## encoding: [0xc3] 320 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 321 ret <4 x float> %res 322} 323 324define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 325; CHECK-LABEL: test_mask_andnot_ps_rrk_128: 326; CHECK: ## BB#0: 327; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 328; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] 329; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 330; CHECK-NEXT: retq ## encoding: [0xc3] 331 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 332 ret <4 x float> %res 333} 334 335define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 336; CHECK-LABEL: test_mask_andnot_ps_rrkz_128: 337; CHECK: ## BB#0: 338; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 339; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1] 340; CHECK-NEXT: retq ## encoding: [0xc3] 341 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 342 ret <4 x float> %res 343} 344 345define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 346; CHECK-LABEL: test_mask_andnot_ps_rm_128: 347; CHECK: ## BB#0: 348; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07] 349; CHECK-NEXT: retq ## encoding: [0xc3] 350 %b = load <4 x float>, <4 x float>* %ptr_b 351 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 352 ret <4 x float> %res 353} 354 355define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 356; CHECK-LABEL: test_mask_andnot_ps_rmk_128: 357; CHECK: ## BB#0: 358; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 359; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] 360; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 361; CHECK-NEXT: retq ## encoding: [0xc3] 362 %b = load <4 x float>, <4 x float>* %ptr_b 363 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 364 ret <4 x float> %res 365} 366 367define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 368; CHECK-LABEL: test_mask_andnot_ps_rmkz_128: 369; CHECK: ## BB#0: 370; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 371; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07] 372; CHECK-NEXT: retq ## encoding: [0xc3] 373 %b = load <4 x float>, <4 x float>* %ptr_b 374 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 375 ret <4 x float> %res 376} 377 378define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 379; CHECK-LABEL: test_mask_andnot_ps_rmb_128: 380; CHECK: ## BB#0: 381; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07] 382; CHECK-NEXT: retq ## encoding: [0xc3] 383 %q = load float, float* %ptr_b 384 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 385 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 386 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 387 ret <4 x float> %res 388} 389 390define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 391; CHECK-LABEL: test_mask_andnot_ps_rmbk_128: 392; CHECK: ## BB#0: 393; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 394; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] 395; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 396; CHECK-NEXT: retq ## encoding: [0xc3] 397 %q = load float, float* %ptr_b 398 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 399 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 400 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 401 ret <4 x float> %res 402} 403 404define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 405; CHECK-LABEL: test_mask_andnot_ps_rmbkz_128: 406; CHECK: ## BB#0: 407; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 408; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07] 409; CHECK-NEXT: retq ## encoding: [0xc3] 410 %q = load float, float* %ptr_b 411 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 412 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 413 %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 414 ret <4 x float> %res 415} 416 417declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 418 419define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) { 420; CHECK-LABEL: test_mask_andnot_ps_rr_256: 421; CHECK: ## BB#0: 422; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1] 423; CHECK-NEXT: retq ## encoding: [0xc3] 424 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 425 ret <8 x float> %res 426} 427 428define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 429; CHECK-LABEL: test_mask_andnot_ps_rrk_256: 430; CHECK: ## BB#0: 431; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 432; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] 433; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 434; CHECK-NEXT: retq ## encoding: [0xc3] 435 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 436 ret <8 x float> %res 437} 438 439define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 440; CHECK-LABEL: test_mask_andnot_ps_rrkz_256: 441; CHECK: ## BB#0: 442; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 443; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1] 444; CHECK-NEXT: retq ## encoding: [0xc3] 445 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 446 ret <8 x float> %res 447} 448 449define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 450; CHECK-LABEL: test_mask_andnot_ps_rm_256: 451; CHECK: ## BB#0: 452; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07] 453; CHECK-NEXT: retq ## encoding: [0xc3] 454 %b = load <8 x float>, <8 x float>* %ptr_b 455 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 456 ret <8 x float> %res 457} 458 459define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 460; CHECK-LABEL: test_mask_andnot_ps_rmk_256: 461; CHECK: ## BB#0: 462; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 463; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] 464; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 465; CHECK-NEXT: retq ## encoding: [0xc3] 466 %b = load <8 x float>, <8 x float>* %ptr_b 467 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 468 ret <8 x float> %res 469} 470 471define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 472; CHECK-LABEL: test_mask_andnot_ps_rmkz_256: 473; CHECK: ## BB#0: 474; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 475; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07] 476; CHECK-NEXT: retq ## encoding: [0xc3] 477 %b = load <8 x float>, <8 x float>* %ptr_b 478 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 479 ret <8 x float> %res 480} 481 482define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 483; CHECK-LABEL: test_mask_andnot_ps_rmb_256: 484; CHECK: ## BB#0: 485; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07] 486; CHECK-NEXT: retq ## encoding: [0xc3] 487 %q = load float, float* %ptr_b 488 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 489 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 490 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 491 ret <8 x float> %res 492} 493 494define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 495; CHECK-LABEL: test_mask_andnot_ps_rmbk_256: 496; CHECK: ## BB#0: 497; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 498; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] 499; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 500; CHECK-NEXT: retq ## encoding: [0xc3] 501 %q = load float, float* %ptr_b 502 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 503 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 504 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 505 ret <8 x float> %res 506} 507 508define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 509; CHECK-LABEL: test_mask_andnot_ps_rmbkz_256: 510; CHECK: ## BB#0: 511; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 512; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07] 513; CHECK-NEXT: retq ## encoding: [0xc3] 514 %q = load float, float* %ptr_b 515 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 516 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 517 %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 518 ret <8 x float> %res 519} 520 521declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 522 523define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) { 524; CHECK-LABEL: test_mask_andnot_ps_rr_512: 525; CHECK: ## BB#0: 526; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1] 527; CHECK-NEXT: retq ## encoding: [0xc3] 528 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 529 ret <16 x float> %res 530} 531 532define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 533; CHECK-LABEL: test_mask_andnot_ps_rrk_512: 534; CHECK: ## BB#0: 535; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 536; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1] 537; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 538; CHECK-NEXT: retq ## encoding: [0xc3] 539 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 540 ret <16 x float> %res 541} 542 543define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 544; CHECK-LABEL: test_mask_andnot_ps_rrkz_512: 545; CHECK: ## BB#0: 546; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 547; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1] 548; CHECK-NEXT: retq ## encoding: [0xc3] 549 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 550 ret <16 x float> %res 551} 552 553define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 554; CHECK-LABEL: test_mask_andnot_ps_rm_512: 555; CHECK: ## BB#0: 556; CHECK-NEXT: vandnps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07] 557; CHECK-NEXT: retq ## encoding: [0xc3] 558 %b = load <16 x float>, <16 x float>* %ptr_b 559 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 560 ret <16 x float> %res 561} 562 563define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 564; CHECK-LABEL: test_mask_andnot_ps_rmk_512: 565; CHECK: ## BB#0: 566; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 567; CHECK-NEXT: vandnps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f] 568; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 569; CHECK-NEXT: retq ## encoding: [0xc3] 570 %b = load <16 x float>, <16 x float>* %ptr_b 571 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 572 ret <16 x float> %res 573} 574 575define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 576; CHECK-LABEL: test_mask_andnot_ps_rmkz_512: 577; CHECK: ## BB#0: 578; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 579; CHECK-NEXT: vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07] 580; CHECK-NEXT: retq ## encoding: [0xc3] 581 %b = load <16 x float>, <16 x float>* %ptr_b 582 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 583 ret <16 x float> %res 584} 585 586define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 587; CHECK-LABEL: test_mask_andnot_ps_rmb_512: 588; CHECK: ## BB#0: 589; CHECK-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07] 590; CHECK-NEXT: retq ## encoding: [0xc3] 591 %q = load float, float* %ptr_b 592 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 593 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 594 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 595 ret <16 x float> %res 596} 597 598define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 599; CHECK-LABEL: test_mask_andnot_ps_rmbk_512: 600; CHECK: ## BB#0: 601; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 602; CHECK-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f] 603; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 604; CHECK-NEXT: retq ## encoding: [0xc3] 605 %q = load float, float* %ptr_b 606 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 607 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 608 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 609 ret <16 x float> %res 610} 611 612define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 613; CHECK-LABEL: test_mask_andnot_ps_rmbkz_512: 614; CHECK: ## BB#0: 615; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 616; CHECK-NEXT: vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07] 617; CHECK-NEXT: retq ## encoding: [0xc3] 618 %q = load float, float* %ptr_b 619 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 620 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 621 %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 622 ret <16 x float> %res 623} 624 625declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 626 627define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) { 628; CHECK-LABEL: test_mask_and_ps_rr_128: 629; CHECK: ## BB#0: 630; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1] 631; CHECK-NEXT: retq ## encoding: [0xc3] 632 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 633 ret <4 x float> %res 634} 635 636define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 637; CHECK-LABEL: test_mask_and_ps_rrk_128: 638; CHECK: ## BB#0: 639; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 640; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] 641; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 642; CHECK-NEXT: retq ## encoding: [0xc3] 643 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 644 ret <4 x float> %res 645} 646 647define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 648; CHECK-LABEL: test_mask_and_ps_rrkz_128: 649; CHECK: ## BB#0: 650; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 651; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1] 652; CHECK-NEXT: retq ## encoding: [0xc3] 653 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 654 ret <4 x float> %res 655} 656 657define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 658; CHECK-LABEL: test_mask_and_ps_rm_128: 659; CHECK: ## BB#0: 660; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0x07] 661; CHECK-NEXT: retq ## encoding: [0xc3] 662 %b = load <4 x float>, <4 x float>* %ptr_b 663 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 664 ret <4 x float> %res 665} 666 667define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 668; CHECK-LABEL: test_mask_and_ps_rmk_128: 669; CHECK: ## BB#0: 670; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 671; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] 672; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 673; CHECK-NEXT: retq ## encoding: [0xc3] 674 %b = load <4 x float>, <4 x float>* %ptr_b 675 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 676 ret <4 x float> %res 677} 678 679define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 680; CHECK-LABEL: test_mask_and_ps_rmkz_128: 681; CHECK: ## BB#0: 682; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 683; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07] 684; CHECK-NEXT: retq ## encoding: [0xc3] 685 %b = load <4 x float>, <4 x float>* %ptr_b 686 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 687 ret <4 x float> %res 688} 689 690define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 691; CHECK-LABEL: test_mask_and_ps_rmb_128: 692; CHECK: ## BB#0: 693; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07] 694; CHECK-NEXT: retq ## encoding: [0xc3] 695 %q = load float, float* %ptr_b 696 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 697 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 698 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 699 ret <4 x float> %res 700} 701 702define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 703; CHECK-LABEL: test_mask_and_ps_rmbk_128: 704; CHECK: ## BB#0: 705; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 706; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] 707; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 708; CHECK-NEXT: retq ## encoding: [0xc3] 709 %q = load float, float* %ptr_b 710 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 711 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 712 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 713 ret <4 x float> %res 714} 715 716define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 717; CHECK-LABEL: test_mask_and_ps_rmbkz_128: 718; CHECK: ## BB#0: 719; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 720; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07] 721; CHECK-NEXT: retq ## encoding: [0xc3] 722 %q = load float, float* %ptr_b 723 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 724 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 725 %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 726 ret <4 x float> %res 727} 728 729declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 730 731define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) { 732; CHECK-LABEL: test_mask_and_ps_rr_256: 733; CHECK: ## BB#0: 734; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0xc1] 735; CHECK-NEXT: retq ## encoding: [0xc3] 736 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 737 ret <8 x float> %res 738} 739 740define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 741; CHECK-LABEL: test_mask_and_ps_rrk_256: 742; CHECK: ## BB#0: 743; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 744; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] 745; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 746; CHECK-NEXT: retq ## encoding: [0xc3] 747 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 748 ret <8 x float> %res 749} 750 751define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 752; CHECK-LABEL: test_mask_and_ps_rrkz_256: 753; CHECK: ## BB#0: 754; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 755; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1] 756; CHECK-NEXT: retq ## encoding: [0xc3] 757 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 758 ret <8 x float> %res 759} 760 761define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 762; CHECK-LABEL: test_mask_and_ps_rm_256: 763; CHECK: ## BB#0: 764; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0x07] 765; CHECK-NEXT: retq ## encoding: [0xc3] 766 %b = load <8 x float>, <8 x float>* %ptr_b 767 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 768 ret <8 x float> %res 769} 770 771define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 772; CHECK-LABEL: test_mask_and_ps_rmk_256: 773; CHECK: ## BB#0: 774; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 775; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] 776; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 777; CHECK-NEXT: retq ## encoding: [0xc3] 778 %b = load <8 x float>, <8 x float>* %ptr_b 779 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 780 ret <8 x float> %res 781} 782 783define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 784; CHECK-LABEL: test_mask_and_ps_rmkz_256: 785; CHECK: ## BB#0: 786; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 787; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07] 788; CHECK-NEXT: retq ## encoding: [0xc3] 789 %b = load <8 x float>, <8 x float>* %ptr_b 790 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 791 ret <8 x float> %res 792} 793 794define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 795; CHECK-LABEL: test_mask_and_ps_rmb_256: 796; CHECK: ## BB#0: 797; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07] 798; CHECK-NEXT: retq ## encoding: [0xc3] 799 %q = load float, float* %ptr_b 800 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 801 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 802 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 803 ret <8 x float> %res 804} 805 806define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 807; CHECK-LABEL: test_mask_and_ps_rmbk_256: 808; CHECK: ## BB#0: 809; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 810; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] 811; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 812; CHECK-NEXT: retq ## encoding: [0xc3] 813 %q = load float, float* %ptr_b 814 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 815 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 816 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 817 ret <8 x float> %res 818} 819 820define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 821; CHECK-LABEL: test_mask_and_ps_rmbkz_256: 822; CHECK: ## BB#0: 823; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 824; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07] 825; CHECK-NEXT: retq ## encoding: [0xc3] 826 %q = load float, float* %ptr_b 827 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 828 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 829 %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 830 ret <8 x float> %res 831} 832 833declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 834 835define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) { 836; CHECK-LABEL: test_mask_and_ps_rr_512: 837; CHECK: ## BB#0: 838; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1] 839; CHECK-NEXT: retq ## encoding: [0xc3] 840 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 841 ret <16 x float> %res 842} 843 844define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 845; CHECK-LABEL: test_mask_and_ps_rrk_512: 846; CHECK: ## BB#0: 847; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 848; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1] 849; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 850; CHECK-NEXT: retq ## encoding: [0xc3] 851 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 852 ret <16 x float> %res 853} 854 855define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 856; CHECK-LABEL: test_mask_and_ps_rrkz_512: 857; CHECK: ## BB#0: 858; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 859; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1] 860; CHECK-NEXT: retq ## encoding: [0xc3] 861 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 862 ret <16 x float> %res 863} 864 865define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 866; CHECK-LABEL: test_mask_and_ps_rm_512: 867; CHECK: ## BB#0: 868; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07] 869; CHECK-NEXT: retq ## encoding: [0xc3] 870 %b = load <16 x float>, <16 x float>* %ptr_b 871 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 872 ret <16 x float> %res 873} 874 875define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 876; CHECK-LABEL: test_mask_and_ps_rmk_512: 877; CHECK: ## BB#0: 878; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 879; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f] 880; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 881; CHECK-NEXT: retq ## encoding: [0xc3] 882 %b = load <16 x float>, <16 x float>* %ptr_b 883 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 884 ret <16 x float> %res 885} 886 887define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 888; CHECK-LABEL: test_mask_and_ps_rmkz_512: 889; CHECK: ## BB#0: 890; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 891; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07] 892; CHECK-NEXT: retq ## encoding: [0xc3] 893 %b = load <16 x float>, <16 x float>* %ptr_b 894 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 895 ret <16 x float> %res 896} 897 898define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 899; CHECK-LABEL: test_mask_and_ps_rmb_512: 900; CHECK: ## BB#0: 901; CHECK-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07] 902; CHECK-NEXT: retq ## encoding: [0xc3] 903 %q = load float, float* %ptr_b 904 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 905 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 906 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 907 ret <16 x float> %res 908} 909 910define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 911; CHECK-LABEL: test_mask_and_ps_rmbk_512: 912; CHECK: ## BB#0: 913; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 914; CHECK-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f] 915; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 916; CHECK-NEXT: retq ## encoding: [0xc3] 917 %q = load float, float* %ptr_b 918 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 919 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 920 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 921 ret <16 x float> %res 922} 923 924define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 925; CHECK-LABEL: test_mask_and_ps_rmbkz_512: 926; CHECK: ## BB#0: 927; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 928; CHECK-NEXT: vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07] 929; CHECK-NEXT: retq ## encoding: [0xc3] 930 %q = load float, float* %ptr_b 931 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 932 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 933 %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 934 ret <16 x float> %res 935} 936 937declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 938 939define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) { 940; CHECK-LABEL: test_mask_or_ps_rr_128: 941; CHECK: ## BB#0: 942; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1] 943; CHECK-NEXT: retq ## encoding: [0xc3] 944 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 945 ret <4 x float> %res 946} 947 948define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 949; CHECK-LABEL: test_mask_or_ps_rrk_128: 950; CHECK: ## BB#0: 951; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 952; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] 953; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 954; CHECK-NEXT: retq ## encoding: [0xc3] 955 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 956 ret <4 x float> %res 957} 958 959define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 960; CHECK-LABEL: test_mask_or_ps_rrkz_128: 961; CHECK: ## BB#0: 962; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 963; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1] 964; CHECK-NEXT: retq ## encoding: [0xc3] 965 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 966 ret <4 x float> %res 967} 968 969define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 970; CHECK-LABEL: test_mask_or_ps_rm_128: 971; CHECK: ## BB#0: 972; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0x07] 973; CHECK-NEXT: retq ## encoding: [0xc3] 974 %b = load <4 x float>, <4 x float>* %ptr_b 975 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 976 ret <4 x float> %res 977} 978 979define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 980; CHECK-LABEL: test_mask_or_ps_rmk_128: 981; CHECK: ## BB#0: 982; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 983; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] 984; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 985; CHECK-NEXT: retq ## encoding: [0xc3] 986 %b = load <4 x float>, <4 x float>* %ptr_b 987 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 988 ret <4 x float> %res 989} 990 991define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 992; CHECK-LABEL: test_mask_or_ps_rmkz_128: 993; CHECK: ## BB#0: 994; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 995; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07] 996; CHECK-NEXT: retq ## encoding: [0xc3] 997 %b = load <4 x float>, <4 x float>* %ptr_b 998 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 999 ret <4 x float> %res 1000} 1001 1002define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1003; CHECK-LABEL: test_mask_or_ps_rmb_128: 1004; CHECK: ## BB#0: 1005; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07] 1006; CHECK-NEXT: retq ## encoding: [0xc3] 1007 %q = load float, float* %ptr_b 1008 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1009 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1010 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1011 ret <4 x float> %res 1012} 1013 1014define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1015; CHECK-LABEL: test_mask_or_ps_rmbk_128: 1016; CHECK: ## BB#0: 1017; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1018; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] 1019; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1020; CHECK-NEXT: retq ## encoding: [0xc3] 1021 %q = load float, float* %ptr_b 1022 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1023 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1024 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1025 ret <4 x float> %res 1026} 1027 1028define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1029; CHECK-LABEL: test_mask_or_ps_rmbkz_128: 1030; CHECK: ## BB#0: 1031; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1032; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07] 1033; CHECK-NEXT: retq ## encoding: [0xc3] 1034 %q = load float, float* %ptr_b 1035 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1036 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1037 %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1038 ret <4 x float> %res 1039} 1040 1041declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1042 1043define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1044; CHECK-LABEL: test_mask_or_ps_rr_256: 1045; CHECK: ## BB#0: 1046; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0xc1] 1047; CHECK-NEXT: retq ## encoding: [0xc3] 1048 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1049 ret <8 x float> %res 1050} 1051 1052define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1053; CHECK-LABEL: test_mask_or_ps_rrk_256: 1054; CHECK: ## BB#0: 1055; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1056; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] 1057; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1058; CHECK-NEXT: retq ## encoding: [0xc3] 1059 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1060 ret <8 x float> %res 1061} 1062 1063define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1064; CHECK-LABEL: test_mask_or_ps_rrkz_256: 1065; CHECK: ## BB#0: 1066; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1067; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1] 1068; CHECK-NEXT: retq ## encoding: [0xc3] 1069 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1070 ret <8 x float> %res 1071} 1072 1073define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1074; CHECK-LABEL: test_mask_or_ps_rm_256: 1075; CHECK: ## BB#0: 1076; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0x07] 1077; CHECK-NEXT: retq ## encoding: [0xc3] 1078 %b = load <8 x float>, <8 x float>* %ptr_b 1079 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1080 ret <8 x float> %res 1081} 1082 1083define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1084; CHECK-LABEL: test_mask_or_ps_rmk_256: 1085; CHECK: ## BB#0: 1086; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1087; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] 1088; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1089; CHECK-NEXT: retq ## encoding: [0xc3] 1090 %b = load <8 x float>, <8 x float>* %ptr_b 1091 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1092 ret <8 x float> %res 1093} 1094 1095define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1096; CHECK-LABEL: test_mask_or_ps_rmkz_256: 1097; CHECK: ## BB#0: 1098; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1099; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07] 1100; CHECK-NEXT: retq ## encoding: [0xc3] 1101 %b = load <8 x float>, <8 x float>* %ptr_b 1102 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1103 ret <8 x float> %res 1104} 1105 1106define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1107; CHECK-LABEL: test_mask_or_ps_rmb_256: 1108; CHECK: ## BB#0: 1109; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07] 1110; CHECK-NEXT: retq ## encoding: [0xc3] 1111 %q = load float, float* %ptr_b 1112 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1113 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1114 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1115 ret <8 x float> %res 1116} 1117 1118define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1119; CHECK-LABEL: test_mask_or_ps_rmbk_256: 1120; CHECK: ## BB#0: 1121; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1122; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] 1123; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1124; CHECK-NEXT: retq ## encoding: [0xc3] 1125 %q = load float, float* %ptr_b 1126 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1127 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1128 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1129 ret <8 x float> %res 1130} 1131 1132define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1133; CHECK-LABEL: test_mask_or_ps_rmbkz_256: 1134; CHECK: ## BB#0: 1135; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1136; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07] 1137; CHECK-NEXT: retq ## encoding: [0xc3] 1138 %q = load float, float* %ptr_b 1139 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1140 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1141 %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1142 ret <8 x float> %res 1143} 1144 1145declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1146 1147define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1148; CHECK-LABEL: test_mask_or_ps_rr_512: 1149; CHECK: ## BB#0: 1150; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1] 1151; CHECK-NEXT: retq ## encoding: [0xc3] 1152 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1153 ret <16 x float> %res 1154} 1155 1156define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1157; CHECK-LABEL: test_mask_or_ps_rrk_512: 1158; CHECK: ## BB#0: 1159; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1160; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1] 1161; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1162; CHECK-NEXT: retq ## encoding: [0xc3] 1163 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1164 ret <16 x float> %res 1165} 1166 1167define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1168; CHECK-LABEL: test_mask_or_ps_rrkz_512: 1169; CHECK: ## BB#0: 1170; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1171; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1] 1172; CHECK-NEXT: retq ## encoding: [0xc3] 1173 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1174 ret <16 x float> %res 1175} 1176 1177define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1178; CHECK-LABEL: test_mask_or_ps_rm_512: 1179; CHECK: ## BB#0: 1180; CHECK-NEXT: vorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07] 1181; CHECK-NEXT: retq ## encoding: [0xc3] 1182 %b = load <16 x float>, <16 x float>* %ptr_b 1183 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1184 ret <16 x float> %res 1185} 1186 1187define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1188; CHECK-LABEL: test_mask_or_ps_rmk_512: 1189; CHECK: ## BB#0: 1190; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1191; CHECK-NEXT: vorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f] 1192; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1193; CHECK-NEXT: retq ## encoding: [0xc3] 1194 %b = load <16 x float>, <16 x float>* %ptr_b 1195 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1196 ret <16 x float> %res 1197} 1198 1199define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1200; CHECK-LABEL: test_mask_or_ps_rmkz_512: 1201; CHECK: ## BB#0: 1202; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1203; CHECK-NEXT: vorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07] 1204; CHECK-NEXT: retq ## encoding: [0xc3] 1205 %b = load <16 x float>, <16 x float>* %ptr_b 1206 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1207 ret <16 x float> %res 1208} 1209 1210define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1211; CHECK-LABEL: test_mask_or_ps_rmb_512: 1212; CHECK: ## BB#0: 1213; CHECK-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07] 1214; CHECK-NEXT: retq ## encoding: [0xc3] 1215 %q = load float, float* %ptr_b 1216 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1217 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1218 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1219 ret <16 x float> %res 1220} 1221 1222define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1223; CHECK-LABEL: test_mask_or_ps_rmbk_512: 1224; CHECK: ## BB#0: 1225; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1226; CHECK-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f] 1227; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1228; CHECK-NEXT: retq ## encoding: [0xc3] 1229 %q = load float, float* %ptr_b 1230 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1231 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1232 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1233 ret <16 x float> %res 1234} 1235 1236define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1237; CHECK-LABEL: test_mask_or_ps_rmbkz_512: 1238; CHECK: ## BB#0: 1239; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1240; CHECK-NEXT: vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07] 1241; CHECK-NEXT: retq ## encoding: [0xc3] 1242 %q = load float, float* %ptr_b 1243 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1244 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1245 %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1246 ret <16 x float> %res 1247} 1248 1249declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1250 1251define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) { 1252; CHECK-LABEL: test_mask_xor_ps_rr_128: 1253; CHECK: ## BB#0: 1254; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1] 1255; CHECK-NEXT: retq ## encoding: [0xc3] 1256 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1257 ret <4 x float> %res 1258} 1259 1260define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) { 1261; CHECK-LABEL: test_mask_xor_ps_rrk_128: 1262; CHECK: ## BB#0: 1263; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1264; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] 1265; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1266; CHECK-NEXT: retq ## encoding: [0xc3] 1267 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1268 ret <4 x float> %res 1269} 1270 1271define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) { 1272; CHECK-LABEL: test_mask_xor_ps_rrkz_128: 1273; CHECK: ## BB#0: 1274; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1275; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1] 1276; CHECK-NEXT: retq ## encoding: [0xc3] 1277 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1278 ret <4 x float> %res 1279} 1280 1281define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { 1282; CHECK-LABEL: test_mask_xor_ps_rm_128: 1283; CHECK: ## BB#0: 1284; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0x07] 1285; CHECK-NEXT: retq ## encoding: [0xc3] 1286 %b = load <4 x float>, <4 x float>* %ptr_b 1287 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1288 ret <4 x float> %res 1289} 1290 1291define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) { 1292; CHECK-LABEL: test_mask_xor_ps_rmk_128: 1293; CHECK: ## BB#0: 1294; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1295; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] 1296; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1297; CHECK-NEXT: retq ## encoding: [0xc3] 1298 %b = load <4 x float>, <4 x float>* %ptr_b 1299 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1300 ret <4 x float> %res 1301} 1302 1303define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) { 1304; CHECK-LABEL: test_mask_xor_ps_rmkz_128: 1305; CHECK: ## BB#0: 1306; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1307; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07] 1308; CHECK-NEXT: retq ## encoding: [0xc3] 1309 %b = load <4 x float>, <4 x float>* %ptr_b 1310 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1311 ret <4 x float> %res 1312} 1313 1314define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) { 1315; CHECK-LABEL: test_mask_xor_ps_rmb_128: 1316; CHECK: ## BB#0: 1317; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07] 1318; CHECK-NEXT: retq ## encoding: [0xc3] 1319 %q = load float, float* %ptr_b 1320 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1321 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1322 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) 1323 ret <4 x float> %res 1324} 1325 1326define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) { 1327; CHECK-LABEL: test_mask_xor_ps_rmbk_128: 1328; CHECK: ## BB#0: 1329; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1330; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] 1331; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1332; CHECK-NEXT: retq ## encoding: [0xc3] 1333 %q = load float, float* %ptr_b 1334 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1335 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1336 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) 1337 ret <4 x float> %res 1338} 1339 1340define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) { 1341; CHECK-LABEL: test_mask_xor_ps_rmbkz_128: 1342; CHECK: ## BB#0: 1343; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1344; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07] 1345; CHECK-NEXT: retq ## encoding: [0xc3] 1346 %q = load float, float* %ptr_b 1347 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1348 %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer 1349 %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask) 1350 ret <4 x float> %res 1351} 1352 1353declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1354 1355define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) { 1356; CHECK-LABEL: test_mask_xor_ps_rr_256: 1357; CHECK: ## BB#0: 1358; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0xc1] 1359; CHECK-NEXT: retq ## encoding: [0xc3] 1360 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1361 ret <8 x float> %res 1362} 1363 1364define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) { 1365; CHECK-LABEL: test_mask_xor_ps_rrk_256: 1366; CHECK: ## BB#0: 1367; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1368; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] 1369; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1370; CHECK-NEXT: retq ## encoding: [0xc3] 1371 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1372 ret <8 x float> %res 1373} 1374 1375define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) { 1376; CHECK-LABEL: test_mask_xor_ps_rrkz_256: 1377; CHECK: ## BB#0: 1378; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1379; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1] 1380; CHECK-NEXT: retq ## encoding: [0xc3] 1381 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1382 ret <8 x float> %res 1383} 1384 1385define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { 1386; CHECK-LABEL: test_mask_xor_ps_rm_256: 1387; CHECK: ## BB#0: 1388; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0x07] 1389; CHECK-NEXT: retq ## encoding: [0xc3] 1390 %b = load <8 x float>, <8 x float>* %ptr_b 1391 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1392 ret <8 x float> %res 1393} 1394 1395define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) { 1396; CHECK-LABEL: test_mask_xor_ps_rmk_256: 1397; CHECK: ## BB#0: 1398; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1399; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] 1400; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1401; CHECK-NEXT: retq ## encoding: [0xc3] 1402 %b = load <8 x float>, <8 x float>* %ptr_b 1403 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1404 ret <8 x float> %res 1405} 1406 1407define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) { 1408; CHECK-LABEL: test_mask_xor_ps_rmkz_256: 1409; CHECK: ## BB#0: 1410; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1411; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07] 1412; CHECK-NEXT: retq ## encoding: [0xc3] 1413 %b = load <8 x float>, <8 x float>* %ptr_b 1414 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1415 ret <8 x float> %res 1416} 1417 1418define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) { 1419; CHECK-LABEL: test_mask_xor_ps_rmb_256: 1420; CHECK: ## BB#0: 1421; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07] 1422; CHECK-NEXT: retq ## encoding: [0xc3] 1423 %q = load float, float* %ptr_b 1424 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1425 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1426 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) 1427 ret <8 x float> %res 1428} 1429 1430define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) { 1431; CHECK-LABEL: test_mask_xor_ps_rmbk_256: 1432; CHECK: ## BB#0: 1433; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1434; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] 1435; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1436; CHECK-NEXT: retq ## encoding: [0xc3] 1437 %q = load float, float* %ptr_b 1438 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1439 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1440 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) 1441 ret <8 x float> %res 1442} 1443 1444define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) { 1445; CHECK-LABEL: test_mask_xor_ps_rmbkz_256: 1446; CHECK: ## BB#0: 1447; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] 1448; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07] 1449; CHECK-NEXT: retq ## encoding: [0xc3] 1450 %q = load float, float* %ptr_b 1451 %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 1452 %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer 1453 %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask) 1454 ret <8 x float> %res 1455} 1456 1457declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1458 1459define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) { 1460; CHECK-LABEL: test_mask_xor_ps_rr_512: 1461; CHECK: ## BB#0: 1462; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1] 1463; CHECK-NEXT: retq ## encoding: [0xc3] 1464 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1465 ret <16 x float> %res 1466} 1467 1468define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) { 1469; CHECK-LABEL: test_mask_xor_ps_rrk_512: 1470; CHECK: ## BB#0: 1471; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1472; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1] 1473; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1474; CHECK-NEXT: retq ## encoding: [0xc3] 1475 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1476 ret <16 x float> %res 1477} 1478 1479define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) { 1480; CHECK-LABEL: test_mask_xor_ps_rrkz_512: 1481; CHECK: ## BB#0: 1482; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1483; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1] 1484; CHECK-NEXT: retq ## encoding: [0xc3] 1485 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1486 ret <16 x float> %res 1487} 1488 1489define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) { 1490; CHECK-LABEL: test_mask_xor_ps_rm_512: 1491; CHECK: ## BB#0: 1492; CHECK-NEXT: vxorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07] 1493; CHECK-NEXT: retq ## encoding: [0xc3] 1494 %b = load <16 x float>, <16 x float>* %ptr_b 1495 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1496 ret <16 x float> %res 1497} 1498 1499define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) { 1500; CHECK-LABEL: test_mask_xor_ps_rmk_512: 1501; CHECK: ## BB#0: 1502; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1503; CHECK-NEXT: vxorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f] 1504; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1505; CHECK-NEXT: retq ## encoding: [0xc3] 1506 %b = load <16 x float>, <16 x float>* %ptr_b 1507 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1508 ret <16 x float> %res 1509} 1510 1511define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) { 1512; CHECK-LABEL: test_mask_xor_ps_rmkz_512: 1513; CHECK: ## BB#0: 1514; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1515; CHECK-NEXT: vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07] 1516; CHECK-NEXT: retq ## encoding: [0xc3] 1517 %b = load <16 x float>, <16 x float>* %ptr_b 1518 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1519 ret <16 x float> %res 1520} 1521 1522define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) { 1523; CHECK-LABEL: test_mask_xor_ps_rmb_512: 1524; CHECK: ## BB#0: 1525; CHECK-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07] 1526; CHECK-NEXT: retq ## encoding: [0xc3] 1527 %q = load float, float* %ptr_b 1528 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1529 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1530 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1) 1531 ret <16 x float> %res 1532} 1533 1534define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) { 1535; CHECK-LABEL: test_mask_xor_ps_rmbk_512: 1536; CHECK: ## BB#0: 1537; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1538; CHECK-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f] 1539; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1540; CHECK-NEXT: retq ## encoding: [0xc3] 1541 %q = load float, float* %ptr_b 1542 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1543 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1544 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) 1545 ret <16 x float> %res 1546} 1547 1548define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) { 1549; CHECK-LABEL: test_mask_xor_ps_rmbkz_512: 1550; CHECK: ## BB#0: 1551; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1552; CHECK-NEXT: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07] 1553; CHECK-NEXT: retq ## encoding: [0xc3] 1554 %q = load float, float* %ptr_b 1555 %vecinit.i = insertelement <16 x float> undef, float %q, i32 0 1556 %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer 1557 %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask) 1558 ret <16 x float> %res 1559} 1560 1561declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) 1562 1563declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8) 1564 1565define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { 1566; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128: 1567; CHECK: ## BB#0: 1568; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1569; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8] 1570; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0] 1571; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1572; CHECK-NEXT: retq ## encoding: [0xc3] 1573 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) 1574 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) 1575 %res2 = add <2 x i64> %res, %res1 1576 ret <2 x i64> %res2 1577} 1578 1579declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8) 1580 1581define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { 1582; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256: 1583; CHECK: ## BB#0: 1584; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1585; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8] 1586; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0] 1587; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1588; CHECK-NEXT: retq ## encoding: [0xc3] 1589 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) 1590 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) 1591 %res2 = add <4 x i64> %res, %res1 1592 ret <4 x i64> %res2 1593} 1594 1595declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8) 1596 1597define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { 1598; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128: 1599; CHECK: ## BB#0: 1600; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1601; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8] 1602; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0] 1603; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1604; CHECK-NEXT: retq ## encoding: [0xc3] 1605 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) 1606 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) 1607 %res2 = add <2 x i64> %res, %res1 1608 ret <2 x i64> %res2 1609} 1610 1611declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8) 1612 1613define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { 1614; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256: 1615; CHECK: ## BB#0: 1616; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1617; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8] 1618; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0] 1619; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1620; CHECK-NEXT: retq ## encoding: [0xc3] 1621 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) 1622 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) 1623 %res2 = add <4 x i64> %res, %res1 1624 ret <4 x i64> %res2 1625} 1626 1627declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8) 1628 1629define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { 1630; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128: 1631; CHECK: ## BB#0: 1632; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1633; CHECK-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8] 1634; CHECK-NEXT: vcvtps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] 1635; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1636; CHECK-NEXT: retq ## encoding: [0xc3] 1637 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) 1638 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) 1639 %res2 = add <2 x i64> %res, %res1 1640 ret <2 x i64> %res2 1641} 1642 1643declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8) 1644 1645define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { 1646; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256: 1647; CHECK: ## BB#0: 1648; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1649; CHECK-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8] 1650; CHECK-NEXT: vcvtps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0] 1651; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1652; CHECK-NEXT: retq ## encoding: [0xc3] 1653 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) 1654 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) 1655 %res2 = add <4 x i64> %res, %res1 1656 ret <4 x i64> %res2 1657} 1658 1659declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8) 1660 1661define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { 1662; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128: 1663; CHECK: ## BB#0: 1664; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1665; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8] 1666; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] 1667; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1668; CHECK-NEXT: retq ## encoding: [0xc3] 1669 %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) 1670 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) 1671 %res2 = add <2 x i64> %res, %res1 1672 ret <2 x i64> %res2 1673} 1674 1675declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8) 1676 1677define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { 1678; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256: 1679; CHECK: ## BB#0: 1680; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1681; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8] 1682; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0] 1683; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1684; CHECK-NEXT: retq ## encoding: [0xc3] 1685 %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) 1686 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) 1687 %res2 = add <4 x i64> %res, %res1 1688 ret <4 x i64> %res2 1689} 1690 1691declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8) 1692 1693define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 1694; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128: 1695; CHECK: ## BB#0: 1696; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1697; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] 1698; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0] 1699; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 1700; CHECK-NEXT: retq ## encoding: [0xc3] 1701 %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 1702 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 1703 %res2 = fadd <2 x double> %res, %res1 1704 ret <2 x double> %res2 1705} 1706 1707declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8) 1708 1709define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 1710; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256: 1711; CHECK: ## BB#0: 1712; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1713; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] 1714; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0] 1715; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 1716; CHECK-NEXT: retq ## encoding: [0xc3] 1717 %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 1718 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 1719 %res2 = fadd <4 x double> %res, %res1 1720 ret <4 x double> %res2 1721} 1722 1723declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8) 1724 1725define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { 1726; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128: 1727; CHECK: ## BB#0: 1728; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1729; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] 1730; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] 1731; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 1732; CHECK-NEXT: retq ## encoding: [0xc3] 1733 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) 1734 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1) 1735 %res2 = fadd <4 x float> %res, %res1 1736 ret <4 x float> %res2 1737} 1738 1739declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8) 1740 1741define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { 1742; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256: 1743; CHECK: ## BB#0: 1744; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1745; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] 1746; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] 1747; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 1748; CHECK-NEXT: retq ## encoding: [0xc3] 1749 %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) 1750 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) 1751 %res2 = fadd <4 x float> %res, %res1 1752 ret <4 x float> %res2 1753} 1754 1755declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8) 1756 1757define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { 1758; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: 1759; CHECK: ## BB#0: 1760; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1761; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] 1762; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] 1763; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1764; CHECK-NEXT: retq ## encoding: [0xc3] 1765 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) 1766 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) 1767 %res2 = add <2 x i64> %res, %res1 1768 ret <2 x i64> %res2 1769} 1770 1771declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8) 1772 1773define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { 1774; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: 1775; CHECK: ## BB#0: 1776; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1777; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] 1778; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] 1779; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1780; CHECK-NEXT: retq ## encoding: [0xc3] 1781 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) 1782 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) 1783 %res2 = add <4 x i64> %res, %res1 1784 ret <4 x i64> %res2 1785} 1786 1787declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8) 1788 1789define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { 1790; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: 1791; CHECK: ## BB#0: 1792; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1793; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] 1794; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] 1795; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1796; CHECK-NEXT: retq ## encoding: [0xc3] 1797 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) 1798 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) 1799 %res2 = add <2 x i64> %res, %res1 1800 ret <2 x i64> %res2 1801} 1802 1803declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8) 1804 1805define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { 1806; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: 1807; CHECK: ## BB#0: 1808; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1809; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] 1810; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] 1811; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1812; CHECK-NEXT: retq ## encoding: [0xc3] 1813 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) 1814 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) 1815 %res2 = add <4 x i64> %res, %res1 1816 ret <4 x i64> %res2 1817} 1818 1819declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8) 1820 1821define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { 1822; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: 1823; CHECK: ## BB#0: 1824; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1825; CHECK-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] 1826; CHECK-NEXT: vcvttps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] 1827; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1828; CHECK-NEXT: retq ## encoding: [0xc3] 1829 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) 1830 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) 1831 %res2 = add <2 x i64> %res, %res1 1832 ret <2 x i64> %res2 1833} 1834 1835declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8) 1836 1837define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { 1838; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: 1839; CHECK: ## BB#0: 1840; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1841; CHECK-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] 1842; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] 1843; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1844; CHECK-NEXT: retq ## encoding: [0xc3] 1845 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) 1846 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) 1847 %res2 = add <4 x i64> %res, %res1 1848 ret <4 x i64> %res2 1849} 1850 1851declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8) 1852 1853define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) { 1854; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128: 1855; CHECK: ## BB#0: 1856; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1857; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] 1858; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0] 1859; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 1860; CHECK-NEXT: retq ## encoding: [0xc3] 1861 %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) 1862 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) 1863 %res2 = fadd <2 x double> %res, %res1 1864 ret <2 x double> %res2 1865} 1866 1867declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8) 1868 1869define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) { 1870; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256: 1871; CHECK: ## BB#0: 1872; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1873; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] 1874; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0] 1875; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 1876; CHECK-NEXT: retq ## encoding: [0xc3] 1877 %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) 1878 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) 1879 %res2 = fadd <4 x double> %res, %res1 1880 ret <4 x double> %res2 1881} 1882 1883declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8) 1884 1885define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) { 1886; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128: 1887; CHECK: ## BB#0: 1888; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1889; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] 1890; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] 1891; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 1892; CHECK-NEXT: retq ## encoding: [0xc3] 1893 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) 1894 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1) 1895 %res2 = fadd <4 x float> %res, %res1 1896 ret <4 x float> %res2 1897} 1898 1899declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8) 1900 1901define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) { 1902; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256: 1903; CHECK: ## BB#0: 1904; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1905; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] 1906; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] 1907; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 1908; CHECK-NEXT: retq ## encoding: [0xc3] 1909 %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) 1910 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) 1911 %res2 = fadd <4 x float> %res, %res1 1912 ret <4 x float> %res2 1913} 1914 1915declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8) 1916 1917define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { 1918; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: 1919; CHECK: ## BB#0: 1920; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1921; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] 1922; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] 1923; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 1924; CHECK-NEXT: retq ## encoding: [0xc3] 1925 %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) 1926 %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) 1927 %res2 = add <2 x i64> %res, %res1 1928 ret <2 x i64> %res2 1929} 1930 1931declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8) 1932 1933define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { 1934; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: 1935; CHECK: ## BB#0: 1936; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1937; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] 1938; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] 1939; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 1940; CHECK-NEXT: retq ## encoding: [0xc3] 1941 %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) 1942 %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) 1943 %res2 = add <4 x i64> %res, %res1 1944 ret <4 x i64> %res2 1945} 1946 1947declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8) 1948 1949define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 1950; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_128: 1951; CHECK: ## BB#0: 1952; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1953; CHECK-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04] 1954; CHECK-NEXT: vreducepd $8, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08] 1955; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 1956; CHECK-NEXT: retq ## encoding: [0xc3] 1957 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) 1958 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1) 1959 %res2 = fadd <2 x double> %res, %res1 1960 ret <2 x double> %res2 1961} 1962 1963declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8) 1964 1965define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 1966; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_256: 1967; CHECK: ## BB#0: 1968; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1969; CHECK-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04] 1970; CHECK-NEXT: vreducepd $0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00] 1971; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 1972; CHECK-NEXT: retq ## encoding: [0xc3] 1973 %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) 1974 %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1) 1975 %res2 = fadd <4 x double> %res, %res1 1976 ret <4 x double> %res2 1977} 1978 1979declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8) 1980 1981define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 1982; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_128: 1983; CHECK: ## BB#0: 1984; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 1985; CHECK-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04] 1986; CHECK-NEXT: vreduceps $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58] 1987; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 1988; CHECK-NEXT: retq ## encoding: [0xc3] 1989 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3) 1990 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1) 1991 %res2 = fadd <4 x float> %res, %res1 1992 ret <4 x float> %res2 1993} 1994 1995declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8) 1996 1997define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 1998; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_256: 1999; CHECK: ## BB#0: 2000; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2001; CHECK-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b] 2002; CHECK-NEXT: vreduceps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b] 2003; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 2004; CHECK-NEXT: retq ## encoding: [0xc3] 2005 %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) 2006 %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) 2007 %res2 = fadd <8 x float> %res, %res1 2008 ret <8 x float> %res2 2009} 2010 2011declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) 2012 2013define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 2014; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_128: 2015; CHECK: ## BB#0: 2016; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2017; CHECK-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04] 2018; CHECK-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08] 2019; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] 2020; CHECK-NEXT: retq ## encoding: [0xc3] 2021 %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4) 2022 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1) 2023 %res2 = fadd <2 x double> %res, %res1 2024 ret <2 x double> %res2 2025} 2026 2027declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 2028 2029define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 2030; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_256: 2031; CHECK: ## BB#0: 2032; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2033; CHECK-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04] 2034; CHECK-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58] 2035; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 2036; CHECK-NEXT: retq ## encoding: [0xc3] 2037 %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4) 2038 %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1) 2039 %res2 = fadd <4 x double> %res, %res1 2040 ret <4 x double> %res2 2041} 2042 2043declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) 2044 2045define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 2046; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_128: 2047; CHECK: ## BB#0: 2048; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2049; CHECK-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04] 2050; CHECK-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58] 2051; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] 2052; CHECK-NEXT: retq ## encoding: [0xc3] 2053 %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4) 2054 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1) 2055 %res2 = fadd <4 x float> %res, %res1 2056 ret <4 x float> %res2 2057} 2058 2059declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 2060 2061define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 2062; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_256: 2063; CHECK: ## BB#0: 2064; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2065; CHECK-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04] 2066; CHECK-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58] 2067; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 2068; CHECK-NEXT: retq ## encoding: [0xc3] 2069 %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4) 2070 %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1) 2071 %res2 = fadd <8 x float> %res, %res1 2072 ret <8 x float> %res2 2073} 2074 2075declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8) 2076 2077define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) { 2078; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: 2079; CHECK: ## BB#0: 2080; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2081; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] 2082; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc2,0x01] 2083; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc0,0x01] 2084; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 2085; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] 2086; CHECK-NEXT: retq ## encoding: [0xc3] 2087 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 2088 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 2089 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 2090 %res3 = fadd <2 x double> %res, %res1 2091 %res4 = fadd <2 x double> %res3, %res2 2092 ret <2 x double> %res4 2093} 2094 2095declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8) 2096 2097define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) { 2098; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256: 2099; CHECK: ## BB#0: 2100; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2101; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] 2102; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xd9,0x01] 2103; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xc1,0x01] 2104; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 2105; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 2106; CHECK-NEXT: retq ## encoding: [0xc3] 2107 %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4) 2108 %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1) 2109 %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4) 2110 %res3 = fadd <4 x double> %res, %res1 2111 %res4 = fadd <4 x double> %res2, %res3 2112 ret <4 x double> %res4 2113} 2114 2115declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8) 2116 2117define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) { 2118; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256: 2119; CHECK: ## BB#0: 2120; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2121; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] 2122; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xd9,0x01] 2123; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xc1,0x01] 2124; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2125; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] 2126; CHECK-NEXT: retq ## encoding: [0xc3] 2127 %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4) 2128 %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1) 2129 %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4) 2130 %res3 = add <4 x i64> %res, %res1 2131 %res4 = add <4 x i64> %res3, %res2 2132 ret <4 x i64> %res4 2133} 2134 2135declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) 2136 2137define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) { 2138; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: 2139; CHECK: ## BB#0: 2140; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2141; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] 2142; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] 2143; CHECK-NEXT: vfpclassps $4, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc0,0x04] 2144; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2145; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 2146; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2147; CHECK-NEXT: retq ## encoding: [0xc3] 2148 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1) 2149 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1) 2150 %res2 = add i8 %res, %res1 2151 ret i8 %res2 2152} 2153 2154declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) 2155 2156define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) { 2157; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: 2158; CHECK: ## BB#0: 2159; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2160; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] 2161; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] 2162; CHECK-NEXT: vfpclassps $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc0,0x04] 2163; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2164; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 2165; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2166; CHECK-NEXT: retq ## encoding: [0xc3] 2167 %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1) 2168 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1) 2169 %res2 = add i8 %res, %res1 2170 ret i8 %res2 2171} 2172 2173declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) 2174 2175define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) { 2176; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: 2177; CHECK: ## BB#0: 2178; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2179; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] 2180; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] 2181; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc0,0x02] 2182; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2183; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 2184; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2185; CHECK-NEXT: retq ## encoding: [0xc3] 2186 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1) 2187 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1) 2188 %res2 = add i8 %res, %res1 2189 ret i8 %res2 2190} 2191 2192declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) 2193 2194define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) { 2195; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: 2196; CHECK: ## BB#0: 2197; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2198; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] 2199; CHECK-NEXT: kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8] 2200; CHECK-NEXT: vfpclasspd $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc0,0x04] 2201; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2202; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 2203; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2204; CHECK-NEXT: retq ## encoding: [0xc3] 2205 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1) 2206 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1) 2207 %res2 = add i8 %res, %res1 2208 ret i8 %res2 2209} 2210 2211declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8) 2212 2213define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) { 2214; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256: 2215; CHECK: ## BB#0: 2216; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2217; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x19,0xc8] 2218; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0] 2219; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0] 2220; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] 2221; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 2222; CHECK-NEXT: retq ## encoding: [0xc3] 2223 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 %x3) 2224 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3) 2225 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1) 2226 %res3 = fadd <8 x float> %res, %res1 2227 %res4 = fadd <8 x float> %res3, %res2 2228 ret <8 x float> %res4 2229} 2230 2231declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8) 2232 2233define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) { 2234; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: 2235; CHECK: ## BB#0: 2236; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2237; CHECK-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e] 2238; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0] 2239; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0] 2240; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 2241; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 2242; CHECK-NEXT: retq ## encoding: [0xc3] 2243 %y_64 = load i64, i64 * %y_ptr 2244 %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0 2245 %y = bitcast <2 x i64> %y_v2i64 to <4 x i32> 2246 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %y, <8 x i32> %x2, i8 %x3) 2247 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3) 2248 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) 2249 %res3 = add <8 x i32> %res, %res1 2250 %res4 = add <8 x i32> %res3, %res2 2251 ret <8 x i32> %res4 2252} 2253 2254declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8) 2255 2256define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) { 2257; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128: 2258; CHECK: ## BB#0: 2259; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2260; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8] 2261; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0] 2262; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x59,0xc0] 2263; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 2264; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 2265; CHECK-NEXT: retq ## encoding: [0xc3] 2266 %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) 2267 %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3) 2268 %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1) 2269 %res3 = add <4 x i32> %res, %res1 2270 %res4 = add <4 x i32> %res3, %res2 2271 ret <4 x i32> %res4 2272} 2273 2274declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) 2275 2276define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { 2277; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: 2278; CHECK: ## BB#0: 2279; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] 2280; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2281; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2282; CHECK-NEXT: retq ## encoding: [0xc3] 2283 %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) 2284 ret i8 %res 2285} 2286 2287declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>) 2288 2289define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) { 2290; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256: 2291; CHECK: ## BB#0: 2292; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0] 2293; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2294; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2295; CHECK-NEXT: retq ## encoding: [0xc3] 2296 %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0) 2297 ret i8 %res 2298} 2299 2300declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) 2301 2302define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { 2303; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: 2304; CHECK: ## BB#0: 2305; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] 2306; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2307; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2308; CHECK-NEXT: retq ## encoding: [0xc3] 2309 %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) 2310 ret i8 %res 2311} 2312 2313declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) 2314 2315define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { 2316; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: 2317; CHECK: ## BB#0: 2318; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] 2319; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0] 2320; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 2321; CHECK-NEXT: retq ## encoding: [0xc3] 2322 %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0) 2323 ret i8 %res 2324} 2325 2326declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8) 2327 2328define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) { 2329; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128: 2330; CHECK: ## BB#0: 2331; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] 2332; CHECK-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 2333; CHECK-NEXT: retq ## encoding: [0xc3] 2334 %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0) 2335 ret <4 x i32> %res 2336} 2337 2338declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8) 2339 2340define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) { 2341; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256: 2342; CHECK: ## BB#0: 2343; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] 2344; CHECK-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0] 2345; CHECK-NEXT: retq ## encoding: [0xc3] 2346 %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0) 2347 ret <8 x i32> %res 2348} 2349 2350declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8) 2351 2352define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) { 2353; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128: 2354; CHECK: ## BB#0: 2355; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] 2356; CHECK-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 2357; CHECK-NEXT: retq ## encoding: [0xc3] 2358 %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0) 2359 ret <2 x i64> %res 2360} 2361 2362declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8) 2363 2364define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) { 2365; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256: 2366; CHECK: ## BB#0: 2367; CHECK-NEXT: kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7] 2368; CHECK-NEXT: vpmovm2q %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0] 2369; CHECK-NEXT: retq ## encoding: [0xc3] 2370 %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0) 2371 ret <4 x i64> %res 2372} 2373declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8) 2374 2375define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) { 2376; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256: 2377; CHECK: ## BB#0: 2378; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2379; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2380; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00] 2381; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] 2382; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00] 2383; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] 2384; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00] 2385; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] 2386; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 2387; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 2388; CHECK-NEXT: retq ## encoding: [0xc3] 2389 2390 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1) 2391 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask) 2392 %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask) 2393 %res4 = fadd <4 x double> %res1, %res2 2394 %res5 = fadd <4 x double> %res3, %res4 2395 ret <4 x double> %res5 2396} 2397 2398declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8) 2399 2400define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) { 2401; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256: 2402; CHECK: ## BB#0: 2403; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2404; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] 2405; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00] 2406; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1] 2407; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00] 2408; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] 2409; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00] 2410; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] 2411; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] 2412; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2413; CHECK-NEXT: retq ## encoding: [0xc3] 2414 2415 %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1) 2416 %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) 2417 %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask) 2418 %res4 = add <4 x i64> %res1, %res2 2419 %res5 = add <4 x i64> %res3, %res4 2420 ret <4 x i64> %res5 2421} 2422