1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX 3 4define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { 5; KNL-LABEL: test1: 6; KNL: ## BB#0: 7; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1 8; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} 9; KNL-NEXT: vmovaps %zmm1, %zmm0 10; KNL-NEXT: retq 11 %mask = fcmp ole <16 x float> %x, %y 12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y 13 ret <16 x float> %max 14} 15 16define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { 17; KNL-LABEL: test2: 18; KNL: ## BB#0: 19; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1 20; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} 21; KNL-NEXT: vmovaps %zmm1, %zmm0 22; KNL-NEXT: retq 23 %mask = fcmp ole <8 x double> %x, %y 24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y 25 ret <8 x double> %max 26} 27 28define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind { 29; KNL-LABEL: test3: 30; KNL: ## BB#0: 31; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 32; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 33; KNL-NEXT: vmovaps %zmm1, %zmm0 34; KNL-NEXT: retq 35 %y = load <16 x i32>, <16 x i32>* %yp, align 4 36 %mask = icmp eq <16 x i32> %x, %y 37 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 38 ret <16 x i32> %max 39} 40 41define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 42; KNL-LABEL: test4_unsigned: 43; KNL: ## BB#0: 44; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 45; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} 46; KNL-NEXT: vmovaps %zmm1, %zmm0 47; KNL-NEXT: retq 48 %mask = icmp uge <16 x i32> %x, %y 49 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 50 ret <16 x i32> %max 51} 52 53define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { 54; KNL-LABEL: test5: 55; KNL: ## BB#0: 56; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 57; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 58; KNL-NEXT: vmovaps %zmm1, %zmm0 59; KNL-NEXT: retq 60 %mask = icmp eq <8 x i64> %x, %y 61 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y 62 ret <8 x i64> %max 63} 64 65define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind { 66; KNL-LABEL: test6_unsigned: 67; KNL: ## BB#0: 68; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 69; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} 70; KNL-NEXT: vmovaps %zmm1, %zmm0 71; KNL-NEXT: retq 72 %mask = icmp ugt <8 x i64> %x, %y 73 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y 74 ret <8 x i64> %max 75} 76 77define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { 78; KNL-LABEL: test7: 79; KNL: ## BB#0: 80; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 81; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 82; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 83; KNL-NEXT: retq 84; SKX-LABEL: test7: 85; SKX: ## BB#0: 86; SKX: vxorps %xmm2, %xmm2, %xmm2 87; SKX: vcmpltps %xmm2, %xmm0, %k1 88; SKX: vmovaps %xmm0, %xmm1 {%k1} 89; SKX: vmovaps %zmm1, %zmm0 90; SKX: retq 91 92 %mask = fcmp olt <4 x float> %a, zeroinitializer 93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b 94 ret <4 x float>%c 95} 96 97define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { 98; KNL-LABEL: test8: 99; KNL: ## BB#0: 100; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 101; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 102; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 103; KNL-NEXT: retq 104; SKX-LABEL: test8: 105; SKX: ## BB#0: 106; SKX: vxorpd %xmm2, %xmm2, %xmm2 107; SKX: vcmpltpd %xmm2, %xmm0, %k1 108; SKX: vmovapd %xmm0, %xmm1 {%k1} 109; SKX: vmovaps %zmm1, %zmm0 110; SKX: retq 111 %mask = fcmp olt <2 x double> %a, zeroinitializer 112 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b 113 ret <2 x double>%c 114} 115 116define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { 117; KNL-LABEL: test9: 118; KNL: ## BB#0: 119; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 120; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 121; KNL-NEXT: retq 122 %mask = icmp eq <8 x i32> %x, %y 123 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 124 ret <8 x i32> %max 125} 126 127define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { 128; KNL-LABEL: test10: 129; KNL: ## BB#0: 130; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 131; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 132; KNL-NEXT: retq 133; SKX-LABEL: test10: 134; SKX: ## BB#0: 135; SKX: vcmpeqps %ymm1, %ymm0, %k1 136; SKX: vmovaps %ymm0, %ymm1 {%k1} 137; SKX: vmovaps %zmm1, %zmm0 138; SKX: retq 139 140 %mask = fcmp oeq <8 x float> %x, %y 141 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y 142 ret <8 x float> %max 143} 144 145define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { 146; KNL-LABEL: test11_unsigned: 147; KNL: ## BB#0: 148; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 149; KNL-NEXT: retq 150 %mask = icmp ugt <8 x i32> %x, %y 151 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 152 ret <8 x i32> %max 153} 154 155define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { 156; KNL-LABEL: test12: 157; KNL: ## BB#0: 158; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 159; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 160; KNL-NEXT: kunpckbw %k0, %k1, %k0 161; KNL-NEXT: kmovw %k0, %eax 162; KNL-NEXT: retq 163 %res = icmp eq <16 x i64> %a, %b 164 %res1 = bitcast <16 x i1> %res to i16 165 ret i16 %res1 166} 167 168define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { 169; SKX-LABEL: test12_v32i32: 170; SKX: ## BB#0: 171; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 172; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 173; SKX-NEXT: kunpckwd %k0, %k1, %k0 174; SKX-NEXT: kmovd %k0, %eax 175; SKX-NEXT: retq 176 %res = icmp eq <32 x i32> %a, %b 177 %res1 = bitcast <32 x i1> %res to i32 178 ret i32 %res1 179} 180 181define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { 182; SKX-LABEL: test12_v64i16: 183; SKX: ## BB#0: 184; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 185; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 186; SKX-NEXT: kunpckdq %k0, %k1, %k0 187; SKX-NEXT: kmovq %k0, %rax 188; SKX-NEXT: retq 189 %res = icmp eq <64 x i16> %a, %b 190 %res1 = bitcast <64 x i1> %res to i64 191 ret i64 %res1 192} 193 194define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) 195; KNL-LABEL: test13: 196; KNL: ## BB#0: 197; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 198; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 199; KNL-NEXT: retq 200{ 201 %cmpvector_i = fcmp oeq <16 x float> %a, %b 202 %conv = zext <16 x i1> %cmpvector_i to <16 x i32> 203 ret <16 x i32> %conv 204} 205 206define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { 207; KNL-LABEL: test14: 208; KNL: ## BB#0: 209; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1 210; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 211; KNL-NEXT: knotw %k0, %k0 212; KNL-NEXT: knotw %k0, %k1 213; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z} 214; KNL-NEXT: retq 215 %sub_r = sub <16 x i32> %a, %b 216 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a 217 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32> 218 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer 219 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r 220 ret <16 x i32>%res 221} 222 223define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { 224; KNL-LABEL: test15: 225; KNL: ## BB#0: 226; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1 227; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 228; KNL-NEXT: knotw %k0, %k0 229; KNL-NEXT: knotw %k0, %k1 230; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z} 231; KNL-NEXT: retq 232 %sub_r = sub <8 x i64> %a, %b 233 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a 234 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64> 235 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer 236 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r 237 ret <8 x i64>%res 238} 239 240define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 241; KNL-LABEL: test16: 242; KNL: ## BB#0: 243; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1 244; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} 245; KNL-NEXT: vmovaps %zmm1, %zmm0 246; KNL-NEXT: retq 247 %mask = icmp sge <16 x i32> %x, %y 248 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 249 ret <16 x i32> %max 250} 251 252define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 253; KNL-LABEL: test17: 254; KNL: ## BB#0: 255; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 256; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 257; KNL-NEXT: vmovaps %zmm1, %zmm0 258; KNL-NEXT: retq 259 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 260 %mask = icmp sgt <16 x i32> %x, %y 261 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 262 ret <16 x i32> %max 263} 264 265define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 266; KNL-LABEL: test18: 267; KNL: ## BB#0: 268; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1 269; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 270; KNL-NEXT: vmovaps %zmm1, %zmm0 271; KNL-NEXT: retq 272 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 273 %mask = icmp sle <16 x i32> %x, %y 274 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 275 ret <16 x i32> %max 276} 277 278define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 279; KNL-LABEL: test19: 280; KNL: ## BB#0: 281; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 282; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 283; KNL-NEXT: vmovaps %zmm1, %zmm0 284; KNL-NEXT: retq 285 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 286 %mask = icmp ule <16 x i32> %x, %y 287 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 288 ret <16 x i32> %max 289} 290 291define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind { 292; KNL-LABEL: test20: 293; KNL: ## BB#0: 294; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 295; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} 296; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 297; KNL-NEXT: vmovaps %zmm1, %zmm0 298; KNL-NEXT: retq 299 %mask1 = icmp eq <16 x i32> %x1, %y1 300 %mask0 = icmp eq <16 x i32> %x, %y 301 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 302 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y 303 ret <16 x i32> %max 304} 305 306define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind { 307; KNL-LABEL: test21: 308; KNL: ## BB#0: 309; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 310; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} 311; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} 312; KNL-NEXT: vmovaps %zmm2, %zmm0 313; KNL-NEXT: retq 314 %mask1 = icmp sge <8 x i64> %x1, %y1 315 %mask0 = icmp sle <8 x i64> %x, %y 316 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 317 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 318 ret <8 x i64> %max 319} 320 321define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 322; KNL-LABEL: test22: 323; KNL: ## BB#0: 324; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 325; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} 326; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 327; KNL-NEXT: vmovaps %zmm1, %zmm0 328; KNL-NEXT: retq 329 %mask1 = icmp sgt <8 x i64> %x1, %y1 330 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 331 %mask0 = icmp sgt <8 x i64> %x, %y 332 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 333 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 334 ret <8 x i64> %max 335} 336 337define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 338; KNL-LABEL: test23: 339; KNL: ## BB#0: 340; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 341; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} 342; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 343; KNL-NEXT: vmovaps %zmm1, %zmm0 344; KNL-NEXT: retq 345 %mask1 = icmp sge <16 x i32> %x1, %y1 346 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 347 %mask0 = icmp ule <16 x i32> %x, %y 348 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 349 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 350 ret <16 x i32> %max 351} 352 353define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { 354; KNL-LABEL: test24: 355; KNL: ## BB#0: 356; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 357; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 358; KNL-NEXT: vmovaps %zmm1, %zmm0 359; KNL-NEXT: retq 360 %yb = load i64, i64* %yb.ptr, align 4 361 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 362 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 363 %mask = icmp eq <8 x i64> %x, %y 364 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 365 ret <8 x i64> %max 366} 367 368define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind { 369; KNL-LABEL: test25: 370; KNL: ## BB#0: 371; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 372; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 373; KNL-NEXT: vmovaps %zmm1, %zmm0 374; KNL-NEXT: retq 375 %yb = load i32, i32* %yb.ptr, align 4 376 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 377 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 378 %mask = icmp sle <16 x i32> %x, %y 379 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 380 ret <16 x i32> %max 381} 382 383define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 384; KNL-LABEL: test26: 385; KNL: ## BB#0: 386; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 387; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} 388; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 389; KNL-NEXT: vmovaps %zmm1, %zmm0 390; KNL-NEXT: retq 391 %mask1 = icmp sge <16 x i32> %x1, %y1 392 %yb = load i32, i32* %yb.ptr, align 4 393 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 394 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 395 %mask0 = icmp sgt <16 x i32> %x, %y 396 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 397 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 398 ret <16 x i32> %max 399} 400 401define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 402; KNL-LABEL: test27: 403; KNL: ## BB#0: 404; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1 405; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} 406; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 407; KNL-NEXT: vmovaps %zmm1, %zmm0 408; KNL-NEXT: retq 409 %mask1 = icmp sge <8 x i64> %x1, %y1 410 %yb = load i64, i64* %yb.ptr, align 4 411 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 412 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 413 %mask0 = icmp sle <8 x i64> %x, %y 414 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 415 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 416 ret <8 x i64> %max 417} 418 419; KNL-LABEL: test28 420; KNL: vpcmpgtq 421; KNL: vpcmpgtq 422; KNL: kxnorw 423define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) { 424 %x_gt_y = icmp sgt <8 x i64> %x, %y 425 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1 426 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1 427 %resse = sext <8 x i1>%res to <8 x i32> 428 ret <8 x i32> %resse 429} 430 431; KNL-LABEL: test29 432; KNL: vpcmpgtd 433; KNL: vpcmpgtd 434; KNL: kxorw 435define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) { 436 %x_gt_y = icmp sgt <16 x i32> %x, %y 437 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1 438 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1 439 %resse = sext <16 x i1>%res to <16 x i8> 440 ret <16 x i8> %resse 441} 442 443define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { 444; SKX-LABEL: test30: 445; SKX: vcmpeqpd %ymm1, %ymm0, %k1 446; SKX: vmovapd %ymm0, %ymm1 {%k1} 447 448 %mask = fcmp oeq <4 x double> %x, %y 449 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y 450 ret <4 x double> %max 451} 452 453define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { 454; SKX-LABEL: test31: 455; SKX: vcmpltpd (%rdi), %xmm0, %k1 456; SKX: vmovapd %xmm0, %xmm1 {%k1} 457 458 %y = load <2 x double>, <2 x double>* %yp, align 4 459 %mask = fcmp olt <2 x double> %x, %y 460 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 461 ret <2 x double> %max 462} 463 464define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { 465; SKX-LABEL: test32: 466; SKX: vcmpltpd (%rdi), %ymm0, %k1 467; SKX: vmovapd %ymm0, %ymm1 {%k1} 468 469 %y = load <4 x double>, <4 x double>* %yp, align 4 470 %mask = fcmp ogt <4 x double> %y, %x 471 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 472 ret <4 x double> %max 473} 474 475define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind { 476; SKX-LABEL: test33: 477; SKX: vcmpltpd (%rdi), %zmm0, %k1 478; SKX: vmovapd %zmm0, %zmm1 {%k1} 479 %y = load <8 x double>, <8 x double>* %yp, align 4 480 %mask = fcmp olt <8 x double> %x, %y 481 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 482 ret <8 x double> %max 483} 484 485define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { 486; SKX-LABEL: test34: 487; SKX: vcmpltps (%rdi), %xmm0, %k1 488; SKX: vmovaps %xmm0, %xmm1 {%k1} 489 %y = load <4 x float>, <4 x float>* %yp, align 4 490 %mask = fcmp olt <4 x float> %x, %y 491 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 492 ret <4 x float> %max 493} 494 495define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind { 496; SKX-LABEL: test35: 497; SKX: vcmpltps (%rdi), %ymm0, %k1 498; SKX: vmovaps %ymm0, %ymm1 {%k1} 499 500 %y = load <8 x float>, <8 x float>* %yp, align 4 501 %mask = fcmp ogt <8 x float> %y, %x 502 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 503 ret <8 x float> %max 504} 505 506define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind { 507; SKX-LABEL: test36: 508; SKX: vcmpltps (%rdi), %zmm0, %k1 509; SKX: vmovaps %zmm0, %zmm1 {%k1} 510 %y = load <16 x float>, <16 x float>* %yp, align 4 511 %mask = fcmp olt <16 x float> %x, %y 512 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 513 ret <16 x float> %max 514} 515 516define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind { 517; SKX-LABEL: test37: 518; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 519; SKX: vmovapd %zmm0, %zmm1 {%k1} 520 521 %a = load double, double* %ptr 522 %v = insertelement <8 x double> undef, double %a, i32 0 523 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 524 525 %mask = fcmp ogt <8 x double> %shuffle, %x 526 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 527 ret <8 x double> %max 528} 529 530define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { 531; SKX-LABEL: test38: 532; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1 533; SKX: vmovapd %ymm0, %ymm1 {%k1} 534 535 %a = load double, double* %ptr 536 %v = insertelement <4 x double> undef, double %a, i32 0 537 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer 538 539 %mask = fcmp ogt <4 x double> %shuffle, %x 540 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 541 ret <4 x double> %max 542} 543 544define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { 545; SKX-LABEL: test39: 546; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1 547; SKX: vmovapd %xmm0, %xmm1 {%k1} 548 549 %a = load double, double* %ptr 550 %v = insertelement <2 x double> undef, double %a, i32 0 551 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 552 553 %mask = fcmp ogt <2 x double> %shuffle, %x 554 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 555 ret <2 x double> %max 556} 557 558 559define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind { 560; SKX-LABEL: test40: 561; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1 562; SKX: vmovaps %zmm0, %zmm1 {%k1} 563 564 %a = load float, float* %ptr 565 %v = insertelement <16 x float> undef, float %a, i32 0 566 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 567 568 %mask = fcmp ogt <16 x float> %shuffle, %x 569 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 570 ret <16 x float> %max 571} 572 573define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind { 574; SKX-LABEL: test41: 575; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1 576; SKX: vmovaps %ymm0, %ymm1 {%k1} 577 578 %a = load float, float* %ptr 579 %v = insertelement <8 x float> undef, float %a, i32 0 580 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 581 582 %mask = fcmp ogt <8 x float> %shuffle, %x 583 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 584 ret <8 x float> %max 585} 586 587define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { 588; SKX-LABEL: test42: 589; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1 590; SKX: vmovaps %xmm0, %xmm1 {%k1} 591 592 %a = load float, float* %ptr 593 %v = insertelement <4 x float> undef, float %a, i32 0 594 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 595 596 %mask = fcmp ogt <4 x float> %shuffle, %x 597 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 598 ret <4 x float> %max 599} 600 601define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind { 602; SKX-LABEL: test43: 603; SKX: vpmovw2m %xmm2, %k1 604; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} 605; SKX: vmovapd %zmm0, %zmm1 {%k1} 606 607 %a = load double, double* %ptr 608 %v = insertelement <8 x double> undef, double %a, i32 0 609 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 610 611 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x 612 %mask = and <8 x i1> %mask_cmp, %mask_in 613 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 614 ret <8 x double> %max 615} 616