; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 ; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: andl %edi, %ecx ; NoVLX-NEXT: shll $16, %ecx ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: andl %edi, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 %6 = zext i16 %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast <16 x i1> %2 to i16 %4 = zext i16 %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: andl %edi, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> , i32 8) %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = bitcast <16 x i1> %4 to i16 %6 = zext i16 %5 to i64 ret i64 %6 } declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = bitcast i2 %__u to <2 x i1> %4 = and <2 x i1> %2, %3 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = bitcast i4 %__u to <4 x i1> %4 = and <4 x i1> %2, %3 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: andb %dil, %al ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: # kill: def $ax killed $ax killed $eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andb %dil, %al ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 %6 = zext i8 %5 to i16 ret i16 %6 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: andb %dil, %al ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andb %dil, %al ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 %6 = zext i8 %5 to i32 ret i32 %6 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; VLX: # %bb.0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast <8 x i1> %2 to i8 %4 = zext i8 %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: ; VLX: # %bb.0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: andb %dil, %al ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andb %dil, %al ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> , i32 8) %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = bitcast <8 x i1> %4 to i8 %6 = zext i8 %5 to i64 ret i64 %6 } ; Test that we understand that cmpps with rounding zeros the upper bits of the mask register. define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) { ; VLX-LABEL: test_cmpm_rnd_zero: ; VLX: # %bb.0: ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_cmpm_rnd_zero: ; NoVLX: # %bb.0: ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> , i32 8) %1 = bitcast <16 x i1> %res to i16 %cast = bitcast i16 %1 to <16 x i1> %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> %cast2 = bitcast <32 x i1> %shuffle to i32 ret i32 %cast2 } define i8 @mask_zero_lower(<4 x i32> %a) { ; VLX-LABEL: mask_zero_lower: ; VLX: # %bb.0: ; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0 ; VLX-NEXT: kshiftlb $4, %k0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: def $al killed $al killed $eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: mask_zero_lower: ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def $al killed $al killed $eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %cmp = icmp ne <4 x i32> %a, zeroinitializer %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> %cast = bitcast <8 x i1> %concat to i8 ret i8 %cast }