1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s 3 4; These test cases demonstrate cases where vpternlog could benefit from being commuted. 5 6declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) 7 8define <16 x i32> @vpternlog_v16i32_012(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 9; CHECK-LABEL: vpternlog_v16i32_012: 10; CHECK: ## %bb.0: 11; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 12; CHECK-NEXT: retq 13 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 14 ret <16 x i32> %1 15} 16 17define <16 x i32> @vpternlog_v16i32_102(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 18; CHECK-LABEL: vpternlog_v16i32_102: 19; CHECK: ## %bb.0: 20; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm1, %zmm0 21; CHECK-NEXT: retq 22 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 23 ret <16 x i32> %1 24} 25 26define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 27; CHECK-LABEL: vpternlog_v16i32_210: 28; CHECK: ## %bb.0: 29; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 30; CHECK-NEXT: retq 31 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 32 ret <16 x i32> %1 33} 34 35define <16 x i32> @vpternlog_v16i32_012_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) { 36; CHECK-LABEL: vpternlog_v16i32_012_load0: 37; CHECK: ## %bb.0: 38; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 39; CHECK-NEXT: retq 40 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 41 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 42 ret <16 x i32> %1 43} 44 45define <16 x i32> @vpternlog_v16i32_012_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) { 46; CHECK-LABEL: vpternlog_v16i32_012_load1: 47; CHECK: ## %bb.0: 48; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 49; CHECK-NEXT: retq 50 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 51 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 52 ret <16 x i32> %1 53} 54 55define <16 x i32> @vpternlog_v16i32_012_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) { 56; CHECK-LABEL: vpternlog_v16i32_012_load2: 57; CHECK: ## %bb.0: 58; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 59; CHECK-NEXT: retq 60 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 61 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 62 ret <16 x i32> %1 63} 64 65define <16 x i32> @vpternlog_v16i32_102_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) { 66; CHECK-LABEL: vpternlog_v16i32_102_load0: 67; CHECK: ## %bb.0: 68; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 69; CHECK-NEXT: retq 70 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 71 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 72 ret <16 x i32> %1 73} 74 75define <16 x i32> @vpternlog_v16i32_102_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) { 76; CHECK-LABEL: vpternlog_v16i32_102_load1: 77; CHECK: ## %bb.0: 78; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 79; CHECK-NEXT: retq 80 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 81 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 82 ret <16 x i32> %1 83} 84 85define <16 x i32> @vpternlog_v16i32_102_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) { 86; CHECK-LABEL: vpternlog_v16i32_102_load2: 87; CHECK: ## %bb.0: 88; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 89; CHECK-NEXT: retq 90 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 91 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 92 ret <16 x i32> %1 93} 94 95define <16 x i32> @vpternlog_v16i32_210_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) { 96; CHECK-LABEL: vpternlog_v16i32_210_load0: 97; CHECK: ## %bb.0: 98; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 99; CHECK-NEXT: retq 100 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 101 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 102 ret <16 x i32> %1 103} 104 105define <16 x i32> @vpternlog_v16i32_210_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) { 106; CHECK-LABEL: vpternlog_v16i32_210_load1: 107; CHECK: ## %bb.0: 108; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 109; CHECK-NEXT: retq 110 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 111 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 112 ret <16 x i32> %1 113} 114 115define <16 x i32> @vpternlog_v16i32_210_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) { 116; CHECK-LABEL: vpternlog_v16i32_210_load2: 117; CHECK: ## %bb.0: 118; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 119; CHECK-NEXT: retq 120 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 121 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 122 ret <16 x i32> %1 123} 124 125define <16 x i32> @vpternlog_v16i32_021_load0(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2) { 126; CHECK-LABEL: vpternlog_v16i32_021_load0: 127; CHECK: ## %bb.0: 128; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 129; CHECK-NEXT: retq 130 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 131 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 132 ret <16 x i32> %1 133} 134 135define <16 x i32> @vpternlog_v16i32_021_load1(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2) { 136; CHECK-LABEL: vpternlog_v16i32_021_load1: 137; CHECK: ## %bb.0: 138; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 139; CHECK-NEXT: retq 140 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 141 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 142 ret <16 x i32> %1 143} 144 145define <16 x i32> @vpternlog_v16i32_021_load2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr) { 146; CHECK-LABEL: vpternlog_v16i32_021_load2: 147; CHECK: ## %bb.0: 148; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 149; CHECK-NEXT: retq 150 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 151 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 152 ret <16 x i32> %1 153} 154 155define <16 x i32> @vpternlog_v16i32_012_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 156; CHECK-LABEL: vpternlog_v16i32_012_mask: 157; CHECK: ## %bb.0: 158; CHECK-NEXT: kmovd %edi, %k1 159; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} 160; CHECK-NEXT: retq 161 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 162 %2 = bitcast i16 %mask to <16 x i1> 163 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 164 ret <16 x i32> %3 165} 166 167define <16 x i32> @vpternlog_v16i32_102_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 168; CHECK-LABEL: vpternlog_v16i32_102_mask: 169; CHECK: ## %bb.0: 170; CHECK-NEXT: kmovd %edi, %k1 171; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm0, %zmm1 {%k1} 172; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 173; CHECK-NEXT: retq 174 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 175 %2 = bitcast i16 %mask to <16 x i1> 176 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 177 ret <16 x i32> %3 178} 179 180define <16 x i32> @vpternlog_v16i32_210_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 181; CHECK-LABEL: vpternlog_v16i32_210_mask: 182; CHECK: ## %bb.0: 183; CHECK-NEXT: kmovd %edi, %k1 184; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} 185; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 186; CHECK-NEXT: retq 187 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 188 %2 = bitcast i16 %mask to <16 x i1> 189 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 190 ret <16 x i32> %3 191} 192 193define <16 x i32> @vpternlog_v16i32_012_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 194; CHECK-LABEL: vpternlog_v16i32_012_mask1: 195; CHECK: ## %bb.0: 196; CHECK-NEXT: kmovd %edi, %k1 197; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm0, %zmm1 {%k1} 198; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 199; CHECK-NEXT: retq 200 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 201 %mask.cast = bitcast i16 %mask to <16 x i1> 202 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1 203 ret <16 x i32> %res2 204} 205 206define <16 x i32> @vpternlog_v16i32_012_mask2(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 207; CHECK-LABEL: vpternlog_v16i32_012_mask2: 208; CHECK: ## %bb.0: 209; CHECK-NEXT: kmovd %edi, %k1 210; CHECK-NEXT: vpternlogd $58, %zmm0, %zmm1, %zmm2 {%k1} 211; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 212; CHECK-NEXT: retq 213 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 214 %mask.cast = bitcast i16 %mask to <16 x i1> 215 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2 216 ret <16 x i32> %res2 217} 218 219define <16 x i32> @vpternlog_v16i32_012_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 220; CHECK-LABEL: vpternlog_v16i32_012_load0_mask: 221; CHECK: ## %bb.0: 222; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 223; CHECK-NEXT: kmovd %esi, %k1 224; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} 225; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 226; CHECK-NEXT: retq 227 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 228 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 229 %2 = bitcast i16 %mask to <16 x i1> 230 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 231 ret <16 x i32> %3 232} 233 234define <16 x i32> @vpternlog_v16i32_012_load0_mask1(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 235; CHECK-LABEL: vpternlog_v16i32_012_load0_mask1: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: kmovd %esi, %k1 238; CHECK-NEXT: vpternlogd $65, (%rdi), %zmm1, %zmm0 {%k1} 239; CHECK-NEXT: retq 240 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 241 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 242 %mask.cast = bitcast i16 %mask to <16 x i1> 243 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1 244 ret <16 x i32> %res2 245} 246 247define <16 x i32> @vpternlog_v16i32_012_load0_mask2(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 248; CHECK-LABEL: vpternlog_v16i32_012_load0_mask2: 249; CHECK: ## %bb.0: 250; CHECK-NEXT: kmovd %esi, %k1 251; CHECK-NEXT: vpternlogd $33, (%rdi), %zmm0, %zmm1 {%k1} 252; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 253; CHECK-NEXT: retq 254 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 255 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 256 %mask.cast = bitcast i16 %mask to <16 x i1> 257 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2 258 ret <16 x i32> %res2 259} 260 261define <16 x i32> @vpternlog_v16i32_012_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 262; CHECK-LABEL: vpternlog_v16i32_012_load1_mask: 263; CHECK: ## %bb.0: 264; CHECK-NEXT: kmovd %esi, %k1 265; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} 266; CHECK-NEXT: retq 267 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 268 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 269 %2 = bitcast i16 %mask to <16 x i1> 270 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 271 ret <16 x i32> %3 272} 273 274define <16 x i32> @vpternlog_v16i32_012_load1_mask2(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 275; CHECK-LABEL: vpternlog_v16i32_012_load1_mask2: 276; CHECK: ## %bb.0: 277; CHECK-NEXT: kmovd %esi, %k1 278; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} 279; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 280; CHECK-NEXT: retq 281 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 282 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 283 %mask.cast = bitcast i16 %mask to <16 x i1> 284 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2 285 ret <16 x i32> %res2 286} 287 288define <16 x i32> @vpternlog_v16i32_012_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 289; CHECK-LABEL: vpternlog_v16i32_012_load2_mask: 290; CHECK: ## %bb.0: 291; CHECK-NEXT: kmovd %esi, %k1 292; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} 293; CHECK-NEXT: retq 294 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 295 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 296 %2 = bitcast i16 %mask to <16 x i1> 297 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 298 ret <16 x i32> %3 299} 300 301define <16 x i32> @vpternlog_v16i32_012_load2_mask1(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 302; CHECK-LABEL: vpternlog_v16i32_012_load2_mask1: 303; CHECK: ## %bb.0: 304; CHECK-NEXT: kmovd %esi, %k1 305; CHECK-NEXT: vpternlogd $9, (%rdi), %zmm0, %zmm1 {%k1} 306; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 307; CHECK-NEXT: retq 308 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 309 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) 310 %mask.cast = bitcast i16 %mask to <16 x i1> 311 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1 312 ret <16 x i32> %res2 313} 314 315define <16 x i32> @vpternlog_v16i32_102_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 316; CHECK-LABEL: vpternlog_v16i32_102_load0_mask: 317; CHECK: ## %bb.0: 318; CHECK-NEXT: kmovd %esi, %k1 319; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} 320; CHECK-NEXT: retq 321 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 322 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 323 %2 = bitcast i16 %mask to <16 x i1> 324 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 325 ret <16 x i32> %3 326} 327 328define <16 x i32> @vpternlog_v16i32_102_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 329; CHECK-LABEL: vpternlog_v16i32_102_load1_mask: 330; CHECK: ## %bb.0: 331; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 332; CHECK-NEXT: kmovd %esi, %k1 333; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} 334; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 335; CHECK-NEXT: retq 336 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 337 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 338 %2 = bitcast i16 %mask to <16 x i1> 339 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 340 ret <16 x i32> %3 341} 342 343define <16 x i32> @vpternlog_v16i32_102_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 344; CHECK-LABEL: vpternlog_v16i32_102_load2_mask: 345; CHECK: ## %bb.0: 346; CHECK-NEXT: kmovd %esi, %k1 347; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1} 348; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 349; CHECK-NEXT: retq 350 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 351 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 352 %2 = bitcast i16 %mask to <16 x i1> 353 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 354 ret <16 x i32> %3 355} 356 357define <16 x i32> @vpternlog_v16i32_210_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 358; CHECK-LABEL: vpternlog_v16i32_210_load0_mask: 359; CHECK: ## %bb.0: 360; CHECK-NEXT: kmovd %esi, %k1 361; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm0, %zmm1 {%k1} 362; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 363; CHECK-NEXT: retq 364 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 365 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 366 %2 = bitcast i16 %mask to <16 x i1> 367 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 368 ret <16 x i32> %3 369} 370 371define <16 x i32> @vpternlog_v16i32_210_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 372; CHECK-LABEL: vpternlog_v16i32_210_load1_mask: 373; CHECK: ## %bb.0: 374; CHECK-NEXT: kmovd %esi, %k1 375; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm0, %zmm1 {%k1} 376; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 377; CHECK-NEXT: retq 378 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 379 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 380 %2 = bitcast i16 %mask to <16 x i1> 381 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 382 ret <16 x i32> %3 383} 384 385define <16 x i32> @vpternlog_v16i32_210_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 386; CHECK-LABEL: vpternlog_v16i32_210_load2_mask: 387; CHECK: ## %bb.0: 388; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 389; CHECK-NEXT: kmovd %esi, %k1 390; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} 391; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 392; CHECK-NEXT: retq 393 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 394 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 395 %2 = bitcast i16 %mask to <16 x i1> 396 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 397 ret <16 x i32> %3 398} 399 400define <16 x i32> @vpternlog_v16i32_021_load0_mask(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 401; CHECK-LABEL: vpternlog_v16i32_021_load0_mask: 402; CHECK: ## %bb.0: 403; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2 404; CHECK-NEXT: kmovd %esi, %k1 405; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} 406; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 407; CHECK-NEXT: retq 408 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 409 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 410 %2 = bitcast i16 %mask to <16 x i1> 411 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 412 ret <16 x i32> %3 413} 414 415define <16 x i32> @vpternlog_v16i32_021_load1_mask(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 416; CHECK-LABEL: vpternlog_v16i32_021_load1_mask: 417; CHECK: ## %bb.0: 418; CHECK-NEXT: kmovd %esi, %k1 419; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} 420; CHECK-NEXT: retq 421 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 422 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 423 %2 = bitcast i16 %mask to <16 x i1> 424 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 425 ret <16 x i32> %3 426} 427 428define <16 x i32> @vpternlog_v16i32_021_load2_mask(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 429; CHECK-LABEL: vpternlog_v16i32_021_load2_mask: 430; CHECK: ## %bb.0: 431; CHECK-NEXT: kmovd %esi, %k1 432; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} 433; CHECK-NEXT: retq 434 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 435 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 436 %2 = bitcast i16 %mask to <16 x i1> 437 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 438 ret <16 x i32> %3 439} 440 441define <16 x i32> @vpternlog_v16i32_012_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 442; CHECK-LABEL: vpternlog_v16i32_012_maskz: 443; CHECK: ## %bb.0: 444; CHECK-NEXT: kmovd %edi, %k1 445; CHECK-NEXT: vpternlogd $114, %zmm2, %zmm1, %zmm0 {%k1} {z} 446; CHECK-NEXT: retq 447 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 448 %2 = bitcast i16 %mask to <16 x i1> 449 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 450 ret <16 x i32> %3 451} 452 453define <16 x i32> @vpternlog_v16i32_102_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 454; CHECK-LABEL: vpternlog_v16i32_102_maskz: 455; CHECK: ## %bb.0: 456; CHECK-NEXT: kmovd %edi, %k1 457; CHECK-NEXT: vpternlogd $78, %zmm2, %zmm1, %zmm0 {%k1} {z} 458; CHECK-NEXT: retq 459 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 460 %2 = bitcast i16 %mask to <16 x i1> 461 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 462 ret <16 x i32> %3 463} 464 465define <16 x i32> @vpternlog_v16i32_210_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 466; CHECK-LABEL: vpternlog_v16i32_210_maskz: 467; CHECK: ## %bb.0: 468; CHECK-NEXT: kmovd %edi, %k1 469; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z} 470; CHECK-NEXT: retq 471 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 472 %2 = bitcast i16 %mask to <16 x i1> 473 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 474 ret <16 x i32> %3 475} 476 477define <16 x i32> @vpternlog_v16i32_012_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 478; CHECK-LABEL: vpternlog_v16i32_012_load0_maskz: 479; CHECK: ## %bb.0: 480; CHECK-NEXT: kmovd %esi, %k1 481; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z} 482; CHECK-NEXT: retq 483 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 484 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 485 %2 = bitcast i16 %mask to <16 x i1> 486 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 487 ret <16 x i32> %3 488} 489 490define <16 x i32> @vpternlog_v16i32_012_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 491; CHECK-LABEL: vpternlog_v16i32_012_load1_maskz: 492; CHECK: ## %bb.0: 493; CHECK-NEXT: kmovd %esi, %k1 494; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} 495; CHECK-NEXT: retq 496 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 497 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 498 %2 = bitcast i16 %mask to <16 x i1> 499 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 500 ret <16 x i32> %3 501} 502 503define <16 x i32> @vpternlog_v16i32_012_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 504; CHECK-LABEL: vpternlog_v16i32_012_load2_maskz: 505; CHECK: ## %bb.0: 506; CHECK-NEXT: kmovd %esi, %k1 507; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z} 508; CHECK-NEXT: retq 509 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 510 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 511 %2 = bitcast i16 %mask to <16 x i1> 512 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 513 ret <16 x i32> %3 514} 515 516define <16 x i32> @vpternlog_v16i32_102_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 517; CHECK-LABEL: vpternlog_v16i32_102_load0_maskz: 518; CHECK: ## %bb.0: 519; CHECK-NEXT: kmovd %esi, %k1 520; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} 521; CHECK-NEXT: retq 522 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 523 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 524 %2 = bitcast i16 %mask to <16 x i1> 525 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 526 ret <16 x i32> %3 527} 528 529define <16 x i32> @vpternlog_v16i32_102_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 530; CHECK-LABEL: vpternlog_v16i32_102_load1_maskz: 531; CHECK: ## %bb.0: 532; CHECK-NEXT: kmovd %esi, %k1 533; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm1, %zmm0 {%k1} {z} 534; CHECK-NEXT: retq 535 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 536 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 537 %2 = bitcast i16 %mask to <16 x i1> 538 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 539 ret <16 x i32> %3 540} 541 542define <16 x i32> @vpternlog_v16i32_102_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 543; CHECK-LABEL: vpternlog_v16i32_102_load2_maskz: 544; CHECK: ## %bb.0: 545; CHECK-NEXT: kmovd %esi, %k1 546; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z} 547; CHECK-NEXT: retq 548 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 549 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 550 %2 = bitcast i16 %mask to <16 x i1> 551 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 552 ret <16 x i32> %3 553} 554 555define <16 x i32> @vpternlog_v16i32_210_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 556; CHECK-LABEL: vpternlog_v16i32_210_load0_maskz: 557; CHECK: ## %bb.0: 558; CHECK-NEXT: kmovd %esi, %k1 559; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm1, %zmm0 {%k1} {z} 560; CHECK-NEXT: retq 561 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 562 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 563 %2 = bitcast i16 %mask to <16 x i1> 564 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 565 ret <16 x i32> %3 566} 567 568define <16 x i32> @vpternlog_v16i32_210_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 569; CHECK-LABEL: vpternlog_v16i32_210_load1_maskz: 570; CHECK: ## %bb.0: 571; CHECK-NEXT: kmovd %esi, %k1 572; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1} {z} 573; CHECK-NEXT: retq 574 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 575 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 576 %2 = bitcast i16 %mask to <16 x i1> 577 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 578 ret <16 x i32> %3 579} 580 581define <16 x i32> @vpternlog_v16i32_210_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 582; CHECK-LABEL: vpternlog_v16i32_210_load2_maskz: 583; CHECK: ## %bb.0: 584; CHECK-NEXT: kmovd %esi, %k1 585; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z} 586; CHECK-NEXT: retq 587 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 588 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 589 %2 = bitcast i16 %mask to <16 x i1> 590 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 591 ret <16 x i32> %3 592} 593 594define <16 x i32> @vpternlog_v16i32_021_load0_maskz(<16 x i32>* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 595; CHECK-LABEL: vpternlog_v16i32_021_load0_maskz: 596; CHECK: ## %bb.0: 597; CHECK-NEXT: kmovd %esi, %k1 598; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm1, %zmm0 {%k1} {z} 599; CHECK-NEXT: retq 600 %x0 = load <16 x i32>, <16 x i32>* %x0ptr 601 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 602 %2 = bitcast i16 %mask to <16 x i1> 603 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 604 ret <16 x i32> %3 605} 606 607define <16 x i32> @vpternlog_v16i32_021_load1_maskz(<16 x i32> %x0, <16 x i32>* %x1ptr, <16 x i32> %x2, i16 %mask) { 608; CHECK-LABEL: vpternlog_v16i32_021_load1_maskz: 609; CHECK: ## %bb.0: 610; CHECK-NEXT: kmovd %esi, %k1 611; CHECK-NEXT: vpternlogd $114, (%rdi), %zmm1, %zmm0 {%k1} {z} 612; CHECK-NEXT: retq 613 %x1 = load <16 x i32>, <16 x i32>* %x1ptr 614 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 615 %2 = bitcast i16 %mask to <16 x i1> 616 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 617 ret <16 x i32> %3 618} 619 620define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2ptr, i16 %mask) { 621; CHECK-LABEL: vpternlog_v16i32_021_load2_maskz: 622; CHECK: ## %bb.0: 623; CHECK-NEXT: kmovd %esi, %k1 624; CHECK-NEXT: vpternlogd $116, (%rdi), %zmm1, %zmm0 {%k1} {z} 625; CHECK-NEXT: retq 626 %x2 = load <16 x i32>, <16 x i32>* %x2ptr 627 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 628 %2 = bitcast i16 %mask to <16 x i1> 629 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 630 ret <16 x i32> %3 631} 632 633define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) { 634; CHECK-LABEL: vpternlog_v16i32_012_broadcast0: 635; CHECK: ## %bb.0: 636; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 637; CHECK-NEXT: retq 638 %x0_scalar = load i32, i32* %ptr_x0 639 %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0 640 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 641 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 642 ret <16 x i32> %1 643} 644 645define <16 x i32> @vpternlog_v16i32_012_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) { 646; CHECK-LABEL: vpternlog_v16i32_012_broadcast1: 647; CHECK: ## %bb.0: 648; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 649; CHECK-NEXT: retq 650 %x1_scalar = load i32, i32* %ptr_x1 651 %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0 652 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 653 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 654 ret <16 x i32> %1 655} 656 657define <16 x i32> @vpternlog_v16i32_012_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) { 658; CHECK-LABEL: vpternlog_v16i32_012_broadcast2: 659; CHECK: ## %bb.0: 660; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 661; CHECK-NEXT: retq 662 %x2_scalar = load i32, i32* %ptr_x2 663 %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0 664 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 665 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 666 ret <16 x i32> %1 667} 668 669define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) { 670; CHECK-LABEL: vpternlog_v16i32_102_broadcast0: 671; CHECK: ## %bb.0: 672; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 673; CHECK-NEXT: retq 674 %x0_scalar = load i32, i32* %ptr_x0 675 %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0 676 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 677 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 678 ret <16 x i32> %1 679} 680 681define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) { 682; CHECK-LABEL: vpternlog_v16i32_102_broadcast1: 683; CHECK: ## %bb.0: 684; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 685; CHECK-NEXT: retq 686 %x1_scalar = load i32, i32* %ptr_x1 687 %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0 688 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 689 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 690 ret <16 x i32> %1 691} 692 693define <16 x i32> @vpternlog_v16i32_102_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) { 694; CHECK-LABEL: vpternlog_v16i32_102_broadcast2: 695; CHECK: ## %bb.0: 696; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 697; CHECK-NEXT: retq 698 %x2_scalar = load i32, i32* %ptr_x2 699 %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0 700 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 701 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 702 ret <16 x i32> %1 703} 704 705define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) { 706; CHECK-LABEL: vpternlog_v16i32_210_broadcast0: 707; CHECK: ## %bb.0: 708; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 709; CHECK-NEXT: retq 710 %x0_scalar = load i32, i32* %ptr_x0 711 %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0 712 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 713 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 714 ret <16 x i32> %1 715} 716 717define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) { 718; CHECK-LABEL: vpternlog_v16i32_210_broadcast1: 719; CHECK: ## %bb.0: 720; CHECK-NEXT: vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 721; CHECK-NEXT: retq 722 %x1_scalar = load i32, i32* %ptr_x1 723 %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0 724 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 725 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 726 ret <16 x i32> %1 727} 728 729define <16 x i32> @vpternlog_v16i32_210_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) { 730; CHECK-LABEL: vpternlog_v16i32_210_broadcast2: 731; CHECK: ## %bb.0: 732; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 733; CHECK-NEXT: retq 734 %x2_scalar = load i32, i32* %ptr_x2 735 %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0 736 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 737 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 738 ret <16 x i32> %1 739} 740 741define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 742; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask: 743; CHECK: ## %bb.0: 744; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 745; CHECK-NEXT: kmovd %esi, %k1 746; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} 747; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 748; CHECK-NEXT: retq 749 %x0scalar = load i32, i32* %x0ptr 750 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 751 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 752 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 753 %2 = bitcast i16 %mask to <16 x i1> 754 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 755 ret <16 x i32> %3 756} 757 758define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 759; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask: 760; CHECK: ## %bb.0: 761; CHECK-NEXT: kmovd %esi, %k1 762; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 763; CHECK-NEXT: retq 764 %x1scalar = load i32, i32* %x1ptr 765 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 766 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 767 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 768 %2 = bitcast i16 %mask to <16 x i1> 769 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 770 ret <16 x i32> %3 771} 772 773define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 774; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask: 775; CHECK: ## %bb.0: 776; CHECK-NEXT: kmovd %esi, %k1 777; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 778; CHECK-NEXT: retq 779 %x2scalar = load i32, i32* %x2ptr 780 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 781 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 782 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 783 %2 = bitcast i16 %mask to <16 x i1> 784 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 785 ret <16 x i32> %3 786} 787 788define <16 x i32> @vpternlog_v16i32_102_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 789; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_mask: 790; CHECK: ## %bb.0: 791; CHECK-NEXT: kmovd %esi, %k1 792; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 793; CHECK-NEXT: retq 794 %x0scalar = load i32, i32* %x0ptr 795 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 796 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 797 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 798 %2 = bitcast i16 %mask to <16 x i1> 799 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 800 ret <16 x i32> %3 801} 802 803define <16 x i32> @vpternlog_v16i32_102_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 804; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_mask: 805; CHECK: ## %bb.0: 806; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 807; CHECK-NEXT: kmovd %esi, %k1 808; CHECK-NEXT: vpternlogd $114, %zmm1, %zmm0, %zmm2 {%k1} 809; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 810; CHECK-NEXT: retq 811 %x1scalar = load i32, i32* %x1ptr 812 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 813 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 814 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 815 %2 = bitcast i16 %mask to <16 x i1> 816 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 817 ret <16 x i32> %3 818} 819 820define <16 x i32> @vpternlog_v16i32_102_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 821; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_mask: 822; CHECK: ## %bb.0: 823; CHECK-NEXT: kmovd %esi, %k1 824; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 825; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 826; CHECK-NEXT: retq 827 %x2scalar = load i32, i32* %x2ptr 828 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 829 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 830 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 831 %2 = bitcast i16 %mask to <16 x i1> 832 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 833 ret <16 x i32> %3 834} 835 836define <16 x i32> @vpternlog_v16i32_210_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 837; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_mask: 838; CHECK: ## %bb.0: 839; CHECK-NEXT: kmovd %esi, %k1 840; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 841; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 842; CHECK-NEXT: retq 843 %x0scalar = load i32, i32* %x0ptr 844 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 845 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 846 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 847 %2 = bitcast i16 %mask to <16 x i1> 848 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 849 ret <16 x i32> %3 850} 851 852define <16 x i32> @vpternlog_v16i32_210_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 853; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_mask: 854; CHECK: ## %bb.0: 855; CHECK-NEXT: kmovd %esi, %k1 856; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 857; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 858; CHECK-NEXT: retq 859 %x1scalar = load i32, i32* %x1ptr 860 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 861 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 862 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 863 %2 = bitcast i16 %mask to <16 x i1> 864 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 865 ret <16 x i32> %3 866} 867 868define <16 x i32> @vpternlog_v16i32_210_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 869; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_mask: 870; CHECK: ## %bb.0: 871; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 872; CHECK-NEXT: kmovd %esi, %k1 873; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} 874; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 875; CHECK-NEXT: retq 876 %x2scalar = load i32, i32* %x2ptr 877 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 878 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 879 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 880 %2 = bitcast i16 %mask to <16 x i1> 881 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 882 ret <16 x i32> %3 883} 884 885define <16 x i32> @vpternlog_v16i32_021_broadcast0_mask(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 886; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_mask: 887; CHECK: ## %bb.0: 888; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2 889; CHECK-NEXT: kmovd %esi, %k1 890; CHECK-NEXT: vpternlogd $114, %zmm0, %zmm1, %zmm2 {%k1} 891; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 892; CHECK-NEXT: retq 893 %x0scalar = load i32, i32* %x0ptr 894 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 895 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 896 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 897 %2 = bitcast i16 %mask to <16 x i1> 898 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 899 ret <16 x i32> %3 900} 901 902define <16 x i32> @vpternlog_v16i32_021_broadcast1_mask(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 903; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_mask: 904; CHECK: ## %bb.0: 905; CHECK-NEXT: kmovd %esi, %k1 906; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 907; CHECK-NEXT: retq 908 %x1scalar = load i32, i32* %x1ptr 909 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 910 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 911 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 912 %2 = bitcast i16 %mask to <16 x i1> 913 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 914 ret <16 x i32> %3 915} 916 917define <16 x i32> @vpternlog_v16i32_021_broadcast2_mask(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 918; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_mask: 919; CHECK: ## %bb.0: 920; CHECK-NEXT: kmovd %esi, %k1 921; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 922; CHECK-NEXT: retq 923 %x2scalar = load i32, i32* %x2ptr 924 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 925 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 926 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 927 %2 = bitcast i16 %mask to <16 x i1> 928 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 929 ret <16 x i32> %3 930} 931 932define <16 x i32> @vpternlog_v16i32_012_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 933; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_maskz: 934; CHECK: ## %bb.0: 935; CHECK-NEXT: kmovd %esi, %k1 936; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 937; CHECK-NEXT: retq 938 %x0scalar = load i32, i32* %x0ptr 939 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 940 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 941 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 942 %2 = bitcast i16 %mask to <16 x i1> 943 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 944 ret <16 x i32> %3 945} 946 947define <16 x i32> @vpternlog_v16i32_012_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 948; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_maskz: 949; CHECK: ## %bb.0: 950; CHECK-NEXT: kmovd %esi, %k1 951; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 952; CHECK-NEXT: retq 953 %x1scalar = load i32, i32* %x1ptr 954 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 955 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 956 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 957 %2 = bitcast i16 %mask to <16 x i1> 958 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 959 ret <16 x i32> %3 960} 961 962define <16 x i32> @vpternlog_v16i32_012_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 963; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_maskz: 964; CHECK: ## %bb.0: 965; CHECK-NEXT: kmovd %esi, %k1 966; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 967; CHECK-NEXT: retq 968 %x2scalar = load i32, i32* %x2ptr 969 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 970 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 971 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 972 %2 = bitcast i16 %mask to <16 x i1> 973 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 974 ret <16 x i32> %3 975} 976 977define <16 x i32> @vpternlog_v16i32_102_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 978; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_maskz: 979; CHECK: ## %bb.0: 980; CHECK-NEXT: kmovd %esi, %k1 981; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 982; CHECK-NEXT: retq 983 %x0scalar = load i32, i32* %x0ptr 984 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 985 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 986 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 987 %2 = bitcast i16 %mask to <16 x i1> 988 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 989 ret <16 x i32> %3 990} 991 992define <16 x i32> @vpternlog_v16i32_102_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 993; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_maskz: 994; CHECK: ## %bb.0: 995; CHECK-NEXT: kmovd %esi, %k1 996; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 997; CHECK-NEXT: retq 998 %x1scalar = load i32, i32* %x1ptr 999 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 1000 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1001 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 1002 %2 = bitcast i16 %mask to <16 x i1> 1003 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1004 ret <16 x i32> %3 1005} 1006 1007define <16 x i32> @vpternlog_v16i32_102_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 1008; CHECK-LABEL: vpternlog_v16i32_102_broadcast2_maskz: 1009; CHECK: ## %bb.0: 1010; CHECK-NEXT: kmovd %esi, %k1 1011; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1012; CHECK-NEXT: retq 1013 %x2scalar = load i32, i32* %x2ptr 1014 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 1015 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1016 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2, i32 114) 1017 %2 = bitcast i16 %mask to <16 x i1> 1018 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1019 ret <16 x i32> %3 1020} 1021 1022define <16 x i32> @vpternlog_v16i32_210_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 1023; CHECK-LABEL: vpternlog_v16i32_210_broadcast0_maskz: 1024; CHECK: ## %bb.0: 1025; CHECK-NEXT: kmovd %esi, %k1 1026; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1027; CHECK-NEXT: retq 1028 %x0scalar = load i32, i32* %x0ptr 1029 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 1030 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1031 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 1032 %2 = bitcast i16 %mask to <16 x i1> 1033 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1034 ret <16 x i32> %3 1035} 1036 1037define <16 x i32> @vpternlog_v16i32_210_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 1038; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_maskz: 1039; CHECK: ## %bb.0: 1040; CHECK-NEXT: kmovd %esi, %k1 1041; CHECK-NEXT: vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1042; CHECK-NEXT: retq 1043 %x1scalar = load i32, i32* %x1ptr 1044 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 1045 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1046 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 1047 %2 = bitcast i16 %mask to <16 x i1> 1048 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1049 ret <16 x i32> %3 1050} 1051 1052define <16 x i32> @vpternlog_v16i32_210_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 1053; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_maskz: 1054; CHECK: ## %bb.0: 1055; CHECK-NEXT: kmovd %esi, %k1 1056; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1057; CHECK-NEXT: retq 1058 %x2scalar = load i32, i32* %x2ptr 1059 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 1060 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1061 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114) 1062 %2 = bitcast i16 %mask to <16 x i1> 1063 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1064 ret <16 x i32> %3 1065} 1066 1067define <16 x i32> @vpternlog_v16i32_021_broadcast0_maskz(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 1068; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_maskz: 1069; CHECK: ## %bb.0: 1070; CHECK-NEXT: kmovd %esi, %k1 1071; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1072; CHECK-NEXT: retq 1073 %x0scalar = load i32, i32* %x0ptr 1074 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 1075 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1076 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 1077 %2 = bitcast i16 %mask to <16 x i1> 1078 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1079 ret <16 x i32> %3 1080} 1081 1082define <16 x i32> @vpternlog_v16i32_021_broadcast1_maskz(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 1083; CHECK-LABEL: vpternlog_v16i32_021_broadcast1_maskz: 1084; CHECK: ## %bb.0: 1085; CHECK-NEXT: kmovd %esi, %k1 1086; CHECK-NEXT: vpternlogd $114, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1087; CHECK-NEXT: retq 1088 %x1scalar = load i32, i32* %x1ptr 1089 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 1090 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1091 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 1092 %2 = bitcast i16 %mask to <16 x i1> 1093 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1094 ret <16 x i32> %3 1095} 1096 1097define <16 x i32> @vpternlog_v16i32_021_broadcast2_maskz(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 1098; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_maskz: 1099; CHECK: ## %bb.0: 1100; CHECK-NEXT: kmovd %esi, %k1 1101; CHECK-NEXT: vpternlogd $116, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z} 1102; CHECK-NEXT: retq 1103 %x2scalar = load i32, i32* %x2ptr 1104 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 1105 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1106 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114) 1107 %2 = bitcast i16 %mask to <16 x i1> 1108 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer 1109 ret <16 x i32> %3 1110} 1111 1112define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask1(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 1113; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask1: 1114; CHECK: ## %bb.0: 1115; CHECK-NEXT: kmovd %esi, %k1 1116; CHECK-NEXT: vpternlogd $92, (%rdi){1to16}, %zmm1, %zmm0 {%k1} 1117; CHECK-NEXT: retq 1118 %x0scalar = load i32, i32* %x0ptr 1119 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 1120 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1121 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 1122 %mask.cast = bitcast i16 %mask to <16 x i1> 1123 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1 1124 ret <16 x i32> %res2 1125} 1126 1127define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask2(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) { 1128; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask2: 1129; CHECK: ## %bb.0: 1130; CHECK-NEXT: kmovd %esi, %k1 1131; CHECK-NEXT: vpternlogd $58, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1132; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1133; CHECK-NEXT: retq 1134 %x0scalar = load i32, i32* %x0ptr 1135 %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0 1136 %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1137 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 1138 %mask.cast = bitcast i16 %mask to <16 x i1> 1139 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2 1140 ret <16 x i32> %res2 1141} 1142 1143define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask2(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) { 1144; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask2: 1145; CHECK: ## %bb.0: 1146; CHECK-NEXT: kmovd %esi, %k1 1147; CHECK-NEXT: vpternlogd $46, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1148; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1149; CHECK-NEXT: retq 1150 %x1scalar = load i32, i32* %x1ptr 1151 %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0 1152 %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1153 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 1154 %mask.cast = bitcast i16 %mask to <16 x i1> 1155 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x2 1156 ret <16 x i32> %res2 1157} 1158 1159define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask1(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) { 1160; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask1: 1161; CHECK: ## %bb.0: 1162; CHECK-NEXT: kmovd %esi, %k1 1163; CHECK-NEXT: vpternlogd $78, (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1164; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 1165; CHECK-NEXT: retq 1166 %x2scalar = load i32, i32* %x2ptr 1167 %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0 1168 %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1169 %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114) 1170 %mask.cast = bitcast i16 %mask to <16 x i1> 1171 %res2 = select <16 x i1> %mask.cast, <16 x i32> %1, <16 x i32> %x1 1172 ret <16 x i32> %res2 1173} 1174