1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX 4 5define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: 7; VLX: # %bb.0: # %entry 8; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 9; VLX-NEXT: kmovd %k0, %eax 10; VLX-NEXT: retq 11; 12; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: 13; NoVLX: # %bb.0: # %entry 14; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 15; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 16; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 17; NoVLX-NEXT: kmovw %k0, %eax 18; NoVLX-NEXT: vzeroupper 19; NoVLX-NEXT: retq 20entry: 21 %0 = bitcast <2 x i64> %__a to <16 x i8> 22 %1 = bitcast <2 x i64> %__b to <16 x i8> 23 %2 = icmp eq <16 x i8> %0, %1 24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 25 %4 = bitcast <32 x i1> %3 to i32 26 ret i32 %4 27} 28 29define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 30; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: 31; VLX: # %bb.0: # %entry 32; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 33; VLX-NEXT: kmovd %k0, %eax 34; VLX-NEXT: retq 35; 36; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: 37; NoVLX: # %bb.0: # %entry 38; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 39; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 40; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 41; NoVLX-NEXT: kmovw %k0, %eax 42; NoVLX-NEXT: vzeroupper 43; NoVLX-NEXT: retq 44entry: 45 %0 = bitcast <2 x i64> %__a to <16 x i8> 46 %load = load <2 x i64>, <2 x i64>* %__b 47 %1 = bitcast <2 x i64> %load to <16 x i8> 48 %2 = icmp eq <16 x i8> %0, %1 49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 50 %4 = bitcast <32 x i1> %3 to i32 51 ret i32 %4 52} 53 54define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 55; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: 56; VLX: # %bb.0: # %entry 57; VLX-NEXT: kmovd %edi, %k1 58; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} 59; VLX-NEXT: kmovd %k0, %eax 60; VLX-NEXT: retq 61; 62; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: 63; NoVLX: # %bb.0: # %entry 64; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 65; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 66; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 67; NoVLX-NEXT: kmovw %k0, %eax 68; NoVLX-NEXT: andl %edi, %eax 69; NoVLX-NEXT: vzeroupper 70; NoVLX-NEXT: retq 71entry: 72 %0 = bitcast <2 x i64> %__a to <16 x i8> 73 %1 = bitcast <2 x i64> %__b to <16 x i8> 74 %2 = icmp eq <16 x i8> %0, %1 75 %3 = bitcast i16 %__u to <16 x i1> 76 %4 = and <16 x i1> %2, %3 77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 78 %6 = bitcast <32 x i1> %5 to i32 79 ret i32 %6 80} 81 82define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 83; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: 84; VLX: # %bb.0: # %entry 85; VLX-NEXT: kmovd %edi, %k1 86; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} 87; VLX-NEXT: kmovd %k0, %eax 88; VLX-NEXT: retq 89; 90; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: 91; NoVLX: # %bb.0: # %entry 92; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 93; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 94; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 95; NoVLX-NEXT: kmovw %k0, %eax 96; NoVLX-NEXT: andl %edi, %eax 97; NoVLX-NEXT: vzeroupper 98; NoVLX-NEXT: retq 99entry: 100 %0 = bitcast <2 x i64> %__a to <16 x i8> 101 %load = load <2 x i64>, <2 x i64>* %__b 102 %1 = bitcast <2 x i64> %load to <16 x i8> 103 %2 = icmp eq <16 x i8> %0, %1 104 %3 = bitcast i16 %__u to <16 x i1> 105 %4 = and <16 x i1> %2, %3 106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 107 %6 = bitcast <32 x i1> %5 to i32 108 ret i32 %6 109} 110 111 112define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 113; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: 114; VLX: # %bb.0: # %entry 115; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 116; VLX-NEXT: kmovq %k0, %rax 117; VLX-NEXT: retq 118; 119; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: 120; NoVLX: # %bb.0: # %entry 121; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 122; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 123; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 124; NoVLX-NEXT: kmovw %k0, %eax 125; NoVLX-NEXT: movzwl %ax, %eax 126; NoVLX-NEXT: vzeroupper 127; NoVLX-NEXT: retq 128entry: 129 %0 = bitcast <2 x i64> %__a to <16 x i8> 130 %1 = bitcast <2 x i64> %__b to <16 x i8> 131 %2 = icmp eq <16 x i8> %0, %1 132 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 133 %4 = bitcast <64 x i1> %3 to i64 134 ret i64 %4 135} 136 137define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 138; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: 139; VLX: # %bb.0: # %entry 140; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 141; VLX-NEXT: kmovq %k0, %rax 142; VLX-NEXT: retq 143; 144; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: 145; NoVLX: # %bb.0: # %entry 146; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 147; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 148; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 149; NoVLX-NEXT: kmovw %k0, %eax 150; NoVLX-NEXT: movzwl %ax, %eax 151; NoVLX-NEXT: vzeroupper 152; NoVLX-NEXT: retq 153entry: 154 %0 = bitcast <2 x i64> %__a to <16 x i8> 155 %load = load <2 x i64>, <2 x i64>* %__b 156 %1 = bitcast <2 x i64> %load to <16 x i8> 157 %2 = icmp eq <16 x i8> %0, %1 158 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 159 %4 = bitcast <64 x i1> %3 to i64 160 ret i64 %4 161} 162 163define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 164; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: 165; VLX: # %bb.0: # %entry 166; VLX-NEXT: kmovd %edi, %k1 167; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} 168; VLX-NEXT: kmovq %k0, %rax 169; VLX-NEXT: retq 170; 171; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: 172; NoVLX: # %bb.0: # %entry 173; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 174; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 175; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 176; NoVLX-NEXT: kmovw %k0, %eax 177; NoVLX-NEXT: andl %edi, %eax 178; NoVLX-NEXT: vzeroupper 179; NoVLX-NEXT: retq 180entry: 181 %0 = bitcast <2 x i64> %__a to <16 x i8> 182 %1 = bitcast <2 x i64> %__b to <16 x i8> 183 %2 = icmp eq <16 x i8> %0, %1 184 %3 = bitcast i16 %__u to <16 x i1> 185 %4 = and <16 x i1> %2, %3 186 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 187 %6 = bitcast <64 x i1> %5 to i64 188 ret i64 %6 189} 190 191define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 192; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: 193; VLX: # %bb.0: # %entry 194; VLX-NEXT: kmovd %edi, %k1 195; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} 196; VLX-NEXT: kmovq %k0, %rax 197; VLX-NEXT: retq 198; 199; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: 200; NoVLX: # %bb.0: # %entry 201; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 202; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 203; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 204; NoVLX-NEXT: kmovw %k0, %eax 205; NoVLX-NEXT: andl %edi, %eax 206; NoVLX-NEXT: vzeroupper 207; NoVLX-NEXT: retq 208entry: 209 %0 = bitcast <2 x i64> %__a to <16 x i8> 210 %load = load <2 x i64>, <2 x i64>* %__b 211 %1 = bitcast <2 x i64> %load to <16 x i8> 212 %2 = icmp eq <16 x i8> %0, %1 213 %3 = bitcast i16 %__u to <16 x i1> 214 %4 = and <16 x i1> %2, %3 215 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 216 %6 = bitcast <64 x i1> %5 to i64 217 ret i64 %6 218} 219 220 221define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 222; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: 223; VLX: # %bb.0: # %entry 224; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 225; VLX-NEXT: kmovq %k0, %rax 226; VLX-NEXT: vzeroupper 227; VLX-NEXT: retq 228; 229; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: 230; NoVLX: # %bb.0: # %entry 231; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 232; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 233; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 234; NoVLX-NEXT: kmovw %k0, %ecx 235; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 236; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 237; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 238; NoVLX-NEXT: kmovw %k0, %eax 239; NoVLX-NEXT: shll $16, %eax 240; NoVLX-NEXT: orl %ecx, %eax 241; NoVLX-NEXT: vzeroupper 242; NoVLX-NEXT: retq 243entry: 244 %0 = bitcast <4 x i64> %__a to <32 x i8> 245 %1 = bitcast <4 x i64> %__b to <32 x i8> 246 %2 = icmp eq <32 x i8> %0, %1 247 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 248 %4 = bitcast <64 x i1> %3 to i64 249 ret i64 %4 250} 251 252define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 253; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: 254; VLX: # %bb.0: # %entry 255; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 256; VLX-NEXT: kmovq %k0, %rax 257; VLX-NEXT: vzeroupper 258; VLX-NEXT: retq 259; 260; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: 261; NoVLX: # %bb.0: # %entry 262; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 263; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 264; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 265; NoVLX-NEXT: kmovw %k0, %ecx 266; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 267; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 268; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 269; NoVLX-NEXT: kmovw %k0, %eax 270; NoVLX-NEXT: shll $16, %eax 271; NoVLX-NEXT: orl %ecx, %eax 272; NoVLX-NEXT: vzeroupper 273; NoVLX-NEXT: retq 274entry: 275 %0 = bitcast <4 x i64> %__a to <32 x i8> 276 %load = load <4 x i64>, <4 x i64>* %__b 277 %1 = bitcast <4 x i64> %load to <32 x i8> 278 %2 = icmp eq <32 x i8> %0, %1 279 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 280 %4 = bitcast <64 x i1> %3 to i64 281 ret i64 %4 282} 283 284define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 285; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: 286; VLX: # %bb.0: # %entry 287; VLX-NEXT: kmovd %edi, %k1 288; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} 289; VLX-NEXT: kmovq %k0, %rax 290; VLX-NEXT: vzeroupper 291; VLX-NEXT: retq 292; 293; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: 294; NoVLX: # %bb.0: # %entry 295; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 296; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 297; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 298; NoVLX-NEXT: kmovw %k0, %eax 299; NoVLX-NEXT: andl %edi, %eax 300; NoVLX-NEXT: shrl $16, %edi 301; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 302; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 303; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 304; NoVLX-NEXT: kmovw %k0, %ecx 305; NoVLX-NEXT: andl %edi, %ecx 306; NoVLX-NEXT: shll $16, %ecx 307; NoVLX-NEXT: movzwl %ax, %eax 308; NoVLX-NEXT: orl %ecx, %eax 309; NoVLX-NEXT: vzeroupper 310; NoVLX-NEXT: retq 311entry: 312 %0 = bitcast <4 x i64> %__a to <32 x i8> 313 %1 = bitcast <4 x i64> %__b to <32 x i8> 314 %2 = icmp eq <32 x i8> %0, %1 315 %3 = bitcast i32 %__u to <32 x i1> 316 %4 = and <32 x i1> %2, %3 317 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 318 %6 = bitcast <64 x i1> %5 to i64 319 ret i64 %6 320} 321 322define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 323; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: 324; VLX: # %bb.0: # %entry 325; VLX-NEXT: kmovd %edi, %k1 326; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1} 327; VLX-NEXT: kmovq %k0, %rax 328; VLX-NEXT: vzeroupper 329; VLX-NEXT: retq 330; 331; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: 332; NoVLX: # %bb.0: # %entry 333; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 334; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 335; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 336; NoVLX-NEXT: kmovw %k0, %eax 337; NoVLX-NEXT: andl %edi, %eax 338; NoVLX-NEXT: shrl $16, %edi 339; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 340; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 341; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 342; NoVLX-NEXT: kmovw %k0, %ecx 343; NoVLX-NEXT: andl %edi, %ecx 344; NoVLX-NEXT: shll $16, %ecx 345; NoVLX-NEXT: movzwl %ax, %eax 346; NoVLX-NEXT: orl %ecx, %eax 347; NoVLX-NEXT: vzeroupper 348; NoVLX-NEXT: retq 349entry: 350 %0 = bitcast <4 x i64> %__a to <32 x i8> 351 %load = load <4 x i64>, <4 x i64>* %__b 352 %1 = bitcast <4 x i64> %load to <32 x i8> 353 %2 = icmp eq <32 x i8> %0, %1 354 %3 = bitcast i32 %__u to <32 x i1> 355 %4 = and <32 x i1> %2, %3 356 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 357 %6 = bitcast <64 x i1> %5 to i64 358 ret i64 %6 359} 360 361 362define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 363; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: 364; VLX: # %bb.0: # %entry 365; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 366; VLX-NEXT: kmovd %k0, %eax 367; VLX-NEXT: # kill: def $ax killed $ax killed $eax 368; VLX-NEXT: retq 369; 370; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: 371; NoVLX: # %bb.0: # %entry 372; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 373; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 374; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 375; NoVLX-NEXT: kmovw %k0, %eax 376; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 377; NoVLX-NEXT: vzeroupper 378; NoVLX-NEXT: retq 379entry: 380 %0 = bitcast <2 x i64> %__a to <8 x i16> 381 %1 = bitcast <2 x i64> %__b to <8 x i16> 382 %2 = icmp eq <8 x i16> %0, %1 383 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 384 %4 = bitcast <16 x i1> %3 to i16 385 ret i16 %4 386} 387 388define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 389; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: 390; VLX: # %bb.0: # %entry 391; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 392; VLX-NEXT: kmovd %k0, %eax 393; VLX-NEXT: # kill: def $ax killed $ax killed $eax 394; VLX-NEXT: retq 395; 396; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: 397; NoVLX: # %bb.0: # %entry 398; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 399; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 400; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 401; NoVLX-NEXT: kmovw %k0, %eax 402; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 403; NoVLX-NEXT: vzeroupper 404; NoVLX-NEXT: retq 405entry: 406 %0 = bitcast <2 x i64> %__a to <8 x i16> 407 %load = load <2 x i64>, <2 x i64>* %__b 408 %1 = bitcast <2 x i64> %load to <8 x i16> 409 %2 = icmp eq <8 x i16> %0, %1 410 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 411 %4 = bitcast <16 x i1> %3 to i16 412 ret i16 %4 413} 414 415define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 416; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: 417; VLX: # %bb.0: # %entry 418; VLX-NEXT: kmovd %edi, %k1 419; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 420; VLX-NEXT: kmovd %k0, %eax 421; VLX-NEXT: # kill: def $ax killed $ax killed $eax 422; VLX-NEXT: retq 423; 424; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: 425; NoVLX: # %bb.0: # %entry 426; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 427; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 428; NoVLX-NEXT: kmovw %edi, %k1 429; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 430; NoVLX-NEXT: kmovw %k0, %eax 431; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 432; NoVLX-NEXT: vzeroupper 433; NoVLX-NEXT: retq 434entry: 435 %0 = bitcast <2 x i64> %__a to <8 x i16> 436 %1 = bitcast <2 x i64> %__b to <8 x i16> 437 %2 = icmp eq <8 x i16> %0, %1 438 %3 = bitcast i8 %__u to <8 x i1> 439 %4 = and <8 x i1> %2, %3 440 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 441 %6 = bitcast <16 x i1> %5 to i16 442 ret i16 %6 443} 444 445define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 446; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: 447; VLX: # %bb.0: # %entry 448; VLX-NEXT: kmovd %edi, %k1 449; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 450; VLX-NEXT: kmovd %k0, %eax 451; VLX-NEXT: # kill: def $ax killed $ax killed $eax 452; VLX-NEXT: retq 453; 454; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: 455; NoVLX: # %bb.0: # %entry 456; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 457; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 458; NoVLX-NEXT: kmovw %edi, %k1 459; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 460; NoVLX-NEXT: kmovw %k0, %eax 461; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 462; NoVLX-NEXT: vzeroupper 463; NoVLX-NEXT: retq 464entry: 465 %0 = bitcast <2 x i64> %__a to <8 x i16> 466 %load = load <2 x i64>, <2 x i64>* %__b 467 %1 = bitcast <2 x i64> %load to <8 x i16> 468 %2 = icmp eq <8 x i16> %0, %1 469 %3 = bitcast i8 %__u to <8 x i1> 470 %4 = and <8 x i1> %2, %3 471 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 472 %6 = bitcast <16 x i1> %5 to i16 473 ret i16 %6 474} 475 476 477define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 478; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: 479; VLX: # %bb.0: # %entry 480; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 481; VLX-NEXT: kmovd %k0, %eax 482; VLX-NEXT: retq 483; 484; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: 485; NoVLX: # %bb.0: # %entry 486; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 487; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 488; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 489; NoVLX-NEXT: kmovw %k0, %eax 490; NoVLX-NEXT: vzeroupper 491; NoVLX-NEXT: retq 492entry: 493 %0 = bitcast <2 x i64> %__a to <8 x i16> 494 %1 = bitcast <2 x i64> %__b to <8 x i16> 495 %2 = icmp eq <8 x i16> %0, %1 496 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 497 %4 = bitcast <32 x i1> %3 to i32 498 ret i32 %4 499} 500 501define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 502; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: 503; VLX: # %bb.0: # %entry 504; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 505; VLX-NEXT: kmovd %k0, %eax 506; VLX-NEXT: retq 507; 508; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: 509; NoVLX: # %bb.0: # %entry 510; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 511; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 512; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 513; NoVLX-NEXT: kmovw %k0, %eax 514; NoVLX-NEXT: vzeroupper 515; NoVLX-NEXT: retq 516entry: 517 %0 = bitcast <2 x i64> %__a to <8 x i16> 518 %load = load <2 x i64>, <2 x i64>* %__b 519 %1 = bitcast <2 x i64> %load to <8 x i16> 520 %2 = icmp eq <8 x i16> %0, %1 521 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 522 %4 = bitcast <32 x i1> %3 to i32 523 ret i32 %4 524} 525 526define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 527; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: 528; VLX: # %bb.0: # %entry 529; VLX-NEXT: kmovd %edi, %k1 530; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 531; VLX-NEXT: kmovd %k0, %eax 532; VLX-NEXT: retq 533; 534; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: 535; NoVLX: # %bb.0: # %entry 536; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 537; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 538; NoVLX-NEXT: kmovw %edi, %k1 539; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 540; NoVLX-NEXT: kmovw %k0, %eax 541; NoVLX-NEXT: vzeroupper 542; NoVLX-NEXT: retq 543entry: 544 %0 = bitcast <2 x i64> %__a to <8 x i16> 545 %1 = bitcast <2 x i64> %__b to <8 x i16> 546 %2 = icmp eq <8 x i16> %0, %1 547 %3 = bitcast i8 %__u to <8 x i1> 548 %4 = and <8 x i1> %2, %3 549 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 550 %6 = bitcast <32 x i1> %5 to i32 551 ret i32 %6 552} 553 554define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 555; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: 556; VLX: # %bb.0: # %entry 557; VLX-NEXT: kmovd %edi, %k1 558; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 559; VLX-NEXT: kmovd %k0, %eax 560; VLX-NEXT: retq 561; 562; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: 563; NoVLX: # %bb.0: # %entry 564; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 565; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 566; NoVLX-NEXT: kmovw %edi, %k1 567; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 568; NoVLX-NEXT: kmovw %k0, %eax 569; NoVLX-NEXT: vzeroupper 570; NoVLX-NEXT: retq 571entry: 572 %0 = bitcast <2 x i64> %__a to <8 x i16> 573 %load = load <2 x i64>, <2 x i64>* %__b 574 %1 = bitcast <2 x i64> %load to <8 x i16> 575 %2 = icmp eq <8 x i16> %0, %1 576 %3 = bitcast i8 %__u to <8 x i1> 577 %4 = and <8 x i1> %2, %3 578 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 579 %6 = bitcast <32 x i1> %5 to i32 580 ret i32 %6 581} 582 583 584define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 585; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: 586; VLX: # %bb.0: # %entry 587; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 588; VLX-NEXT: kmovq %k0, %rax 589; VLX-NEXT: retq 590; 591; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: 592; NoVLX: # %bb.0: # %entry 593; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 594; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 595; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 596; NoVLX-NEXT: kmovw %k0, %eax 597; NoVLX-NEXT: movzwl %ax, %eax 598; NoVLX-NEXT: vzeroupper 599; NoVLX-NEXT: retq 600entry: 601 %0 = bitcast <2 x i64> %__a to <8 x i16> 602 %1 = bitcast <2 x i64> %__b to <8 x i16> 603 %2 = icmp eq <8 x i16> %0, %1 604 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 605 %4 = bitcast <64 x i1> %3 to i64 606 ret i64 %4 607} 608 609define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 610; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: 611; VLX: # %bb.0: # %entry 612; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 613; VLX-NEXT: kmovq %k0, %rax 614; VLX-NEXT: retq 615; 616; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: 617; NoVLX: # %bb.0: # %entry 618; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 619; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 620; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 621; NoVLX-NEXT: kmovw %k0, %eax 622; NoVLX-NEXT: movzwl %ax, %eax 623; NoVLX-NEXT: vzeroupper 624; NoVLX-NEXT: retq 625entry: 626 %0 = bitcast <2 x i64> %__a to <8 x i16> 627 %load = load <2 x i64>, <2 x i64>* %__b 628 %1 = bitcast <2 x i64> %load to <8 x i16> 629 %2 = icmp eq <8 x i16> %0, %1 630 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 631 %4 = bitcast <64 x i1> %3 to i64 632 ret i64 %4 633} 634 635define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 636; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: 637; VLX: # %bb.0: # %entry 638; VLX-NEXT: kmovd %edi, %k1 639; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 640; VLX-NEXT: kmovq %k0, %rax 641; VLX-NEXT: retq 642; 643; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: 644; NoVLX: # %bb.0: # %entry 645; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 646; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 647; NoVLX-NEXT: kmovw %edi, %k1 648; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 649; NoVLX-NEXT: kmovw %k0, %eax 650; NoVLX-NEXT: movzwl %ax, %eax 651; NoVLX-NEXT: vzeroupper 652; NoVLX-NEXT: retq 653entry: 654 %0 = bitcast <2 x i64> %__a to <8 x i16> 655 %1 = bitcast <2 x i64> %__b to <8 x i16> 656 %2 = icmp eq <8 x i16> %0, %1 657 %3 = bitcast i8 %__u to <8 x i1> 658 %4 = and <8 x i1> %2, %3 659 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 660 %6 = bitcast <64 x i1> %5 to i64 661 ret i64 %6 662} 663 664define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 665; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: 666; VLX: # %bb.0: # %entry 667; VLX-NEXT: kmovd %edi, %k1 668; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 669; VLX-NEXT: kmovq %k0, %rax 670; VLX-NEXT: retq 671; 672; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: 673; NoVLX: # %bb.0: # %entry 674; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 675; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 676; NoVLX-NEXT: kmovw %edi, %k1 677; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 678; NoVLX-NEXT: kmovw %k0, %eax 679; NoVLX-NEXT: movzwl %ax, %eax 680; NoVLX-NEXT: vzeroupper 681; NoVLX-NEXT: retq 682entry: 683 %0 = bitcast <2 x i64> %__a to <8 x i16> 684 %load = load <2 x i64>, <2 x i64>* %__b 685 %1 = bitcast <2 x i64> %load to <8 x i16> 686 %2 = icmp eq <8 x i16> %0, %1 687 %3 = bitcast i8 %__u to <8 x i1> 688 %4 = and <8 x i1> %2, %3 689 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 690 %6 = bitcast <64 x i1> %5 to i64 691 ret i64 %6 692} 693 694 695define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 696; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: 697; VLX: # %bb.0: # %entry 698; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 699; VLX-NEXT: kmovd %k0, %eax 700; VLX-NEXT: vzeroupper 701; VLX-NEXT: retq 702; 703; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: 704; NoVLX: # %bb.0: # %entry 705; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 706; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 707; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 708; NoVLX-NEXT: kmovw %k0, %eax 709; NoVLX-NEXT: vzeroupper 710; NoVLX-NEXT: retq 711entry: 712 %0 = bitcast <4 x i64> %__a to <16 x i16> 713 %1 = bitcast <4 x i64> %__b to <16 x i16> 714 %2 = icmp eq <16 x i16> %0, %1 715 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 716 %4 = bitcast <32 x i1> %3 to i32 717 ret i32 %4 718} 719 720define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 721; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: 722; VLX: # %bb.0: # %entry 723; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 724; VLX-NEXT: kmovd %k0, %eax 725; VLX-NEXT: vzeroupper 726; VLX-NEXT: retq 727; 728; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: 729; NoVLX: # %bb.0: # %entry 730; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 731; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 732; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 733; NoVLX-NEXT: kmovw %k0, %eax 734; NoVLX-NEXT: vzeroupper 735; NoVLX-NEXT: retq 736entry: 737 %0 = bitcast <4 x i64> %__a to <16 x i16> 738 %load = load <4 x i64>, <4 x i64>* %__b 739 %1 = bitcast <4 x i64> %load to <16 x i16> 740 %2 = icmp eq <16 x i16> %0, %1 741 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 742 %4 = bitcast <32 x i1> %3 to i32 743 ret i32 %4 744} 745 746define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 747; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: 748; VLX: # %bb.0: # %entry 749; VLX-NEXT: kmovd %edi, %k1 750; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} 751; VLX-NEXT: kmovd %k0, %eax 752; VLX-NEXT: vzeroupper 753; VLX-NEXT: retq 754; 755; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: 756; NoVLX: # %bb.0: # %entry 757; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 758; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 759; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 760; NoVLX-NEXT: kmovw %k0, %eax 761; NoVLX-NEXT: andl %edi, %eax 762; NoVLX-NEXT: vzeroupper 763; NoVLX-NEXT: retq 764entry: 765 %0 = bitcast <4 x i64> %__a to <16 x i16> 766 %1 = bitcast <4 x i64> %__b to <16 x i16> 767 %2 = icmp eq <16 x i16> %0, %1 768 %3 = bitcast i16 %__u to <16 x i1> 769 %4 = and <16 x i1> %2, %3 770 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 771 %6 = bitcast <32 x i1> %5 to i32 772 ret i32 %6 773} 774 775define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 776; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: 777; VLX: # %bb.0: # %entry 778; VLX-NEXT: kmovd %edi, %k1 779; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} 780; VLX-NEXT: kmovd %k0, %eax 781; VLX-NEXT: vzeroupper 782; VLX-NEXT: retq 783; 784; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: 785; NoVLX: # %bb.0: # %entry 786; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 787; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 788; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 789; NoVLX-NEXT: kmovw %k0, %eax 790; NoVLX-NEXT: andl %edi, %eax 791; NoVLX-NEXT: vzeroupper 792; NoVLX-NEXT: retq 793entry: 794 %0 = bitcast <4 x i64> %__a to <16 x i16> 795 %load = load <4 x i64>, <4 x i64>* %__b 796 %1 = bitcast <4 x i64> %load to <16 x i16> 797 %2 = icmp eq <16 x i16> %0, %1 798 %3 = bitcast i16 %__u to <16 x i1> 799 %4 = and <16 x i1> %2, %3 800 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 801 %6 = bitcast <32 x i1> %5 to i32 802 ret i32 %6 803} 804 805 806define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 807; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: 808; VLX: # %bb.0: # %entry 809; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 810; VLX-NEXT: kmovq %k0, %rax 811; VLX-NEXT: vzeroupper 812; VLX-NEXT: retq 813; 814; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: 815; NoVLX: # %bb.0: # %entry 816; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 817; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 818; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 819; NoVLX-NEXT: kmovw %k0, %eax 820; NoVLX-NEXT: movzwl %ax, %eax 821; NoVLX-NEXT: vzeroupper 822; NoVLX-NEXT: retq 823entry: 824 %0 = bitcast <4 x i64> %__a to <16 x i16> 825 %1 = bitcast <4 x i64> %__b to <16 x i16> 826 %2 = icmp eq <16 x i16> %0, %1 827 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 828 %4 = bitcast <64 x i1> %3 to i64 829 ret i64 %4 830} 831 832define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 833; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: 834; VLX: # %bb.0: # %entry 835; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 836; VLX-NEXT: kmovq %k0, %rax 837; VLX-NEXT: vzeroupper 838; VLX-NEXT: retq 839; 840; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: 841; NoVLX: # %bb.0: # %entry 842; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 843; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 844; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 845; NoVLX-NEXT: kmovw %k0, %eax 846; NoVLX-NEXT: movzwl %ax, %eax 847; NoVLX-NEXT: vzeroupper 848; NoVLX-NEXT: retq 849entry: 850 %0 = bitcast <4 x i64> %__a to <16 x i16> 851 %load = load <4 x i64>, <4 x i64>* %__b 852 %1 = bitcast <4 x i64> %load to <16 x i16> 853 %2 = icmp eq <16 x i16> %0, %1 854 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 855 %4 = bitcast <64 x i1> %3 to i64 856 ret i64 %4 857} 858 859define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 860; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: 861; VLX: # %bb.0: # %entry 862; VLX-NEXT: kmovd %edi, %k1 863; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} 864; VLX-NEXT: kmovq %k0, %rax 865; VLX-NEXT: vzeroupper 866; VLX-NEXT: retq 867; 868; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: 869; NoVLX: # %bb.0: # %entry 870; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 871; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 872; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 873; NoVLX-NEXT: kmovw %k0, %eax 874; NoVLX-NEXT: andl %edi, %eax 875; NoVLX-NEXT: vzeroupper 876; NoVLX-NEXT: retq 877entry: 878 %0 = bitcast <4 x i64> %__a to <16 x i16> 879 %1 = bitcast <4 x i64> %__b to <16 x i16> 880 %2 = icmp eq <16 x i16> %0, %1 881 %3 = bitcast i16 %__u to <16 x i1> 882 %4 = and <16 x i1> %2, %3 883 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 884 %6 = bitcast <64 x i1> %5 to i64 885 ret i64 %6 886} 887 888define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 889; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: 890; VLX: # %bb.0: # %entry 891; VLX-NEXT: kmovd %edi, %k1 892; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} 893; VLX-NEXT: kmovq %k0, %rax 894; VLX-NEXT: vzeroupper 895; VLX-NEXT: retq 896; 897; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: 898; NoVLX: # %bb.0: # %entry 899; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 900; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 901; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 902; NoVLX-NEXT: kmovw %k0, %eax 903; NoVLX-NEXT: andl %edi, %eax 904; NoVLX-NEXT: vzeroupper 905; NoVLX-NEXT: retq 906entry: 907 %0 = bitcast <4 x i64> %__a to <16 x i16> 908 %load = load <4 x i64>, <4 x i64>* %__b 909 %1 = bitcast <4 x i64> %load to <16 x i16> 910 %2 = icmp eq <16 x i16> %0, %1 911 %3 = bitcast i16 %__u to <16 x i1> 912 %4 = and <16 x i1> %2, %3 913 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 914 %6 = bitcast <64 x i1> %5 to i64 915 ret i64 %6 916} 917 918 919define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 920; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: 921; VLX: # %bb.0: # %entry 922; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 923; VLX-NEXT: kmovq %k0, %rax 924; VLX-NEXT: vzeroupper 925; VLX-NEXT: retq 926; 927; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: 928; NoVLX: # %bb.0: # %entry 929; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 930; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 931; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 932; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 933; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 934; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 935; NoVLX-NEXT: kmovw %k0, %ecx 936; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 937; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 938; NoVLX-NEXT: kmovw %k0, %eax 939; NoVLX-NEXT: shll $16, %eax 940; NoVLX-NEXT: orl %ecx, %eax 941; NoVLX-NEXT: vzeroupper 942; NoVLX-NEXT: retq 943entry: 944 %0 = bitcast <8 x i64> %__a to <32 x i16> 945 %1 = bitcast <8 x i64> %__b to <32 x i16> 946 %2 = icmp eq <32 x i16> %0, %1 947 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 948 %4 = bitcast <64 x i1> %3 to i64 949 ret i64 %4 950} 951 952define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 953; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: 954; VLX: # %bb.0: # %entry 955; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0 956; VLX-NEXT: kmovq %k0, %rax 957; VLX-NEXT: vzeroupper 958; VLX-NEXT: retq 959; 960; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: 961; NoVLX: # %bb.0: # %entry 962; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 963; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 964; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 965; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 966; NoVLX-NEXT: kmovw %k0, %ecx 967; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0 968; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 969; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 970; NoVLX-NEXT: kmovw %k0, %eax 971; NoVLX-NEXT: shll $16, %eax 972; NoVLX-NEXT: orl %ecx, %eax 973; NoVLX-NEXT: vzeroupper 974; NoVLX-NEXT: retq 975entry: 976 %0 = bitcast <8 x i64> %__a to <32 x i16> 977 %load = load <8 x i64>, <8 x i64>* %__b 978 %1 = bitcast <8 x i64> %load to <32 x i16> 979 %2 = icmp eq <32 x i16> %0, %1 980 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 981 %4 = bitcast <64 x i1> %3 to i64 982 ret i64 %4 983} 984 985define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 986; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: 987; VLX: # %bb.0: # %entry 988; VLX-NEXT: kmovd %edi, %k1 989; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 990; VLX-NEXT: kmovq %k0, %rax 991; VLX-NEXT: vzeroupper 992; VLX-NEXT: retq 993; 994; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: 995; NoVLX: # %bb.0: # %entry 996; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 997; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 998; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 999; NoVLX-NEXT: kmovw %k0, %eax 1000; NoVLX-NEXT: andl %edi, %eax 1001; NoVLX-NEXT: shrl $16, %edi 1002; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1003; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1004; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 1005; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 1006; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 1007; NoVLX-NEXT: kmovw %k0, %ecx 1008; NoVLX-NEXT: andl %edi, %ecx 1009; NoVLX-NEXT: shll $16, %ecx 1010; NoVLX-NEXT: movzwl %ax, %eax 1011; NoVLX-NEXT: orl %ecx, %eax 1012; NoVLX-NEXT: vzeroupper 1013; NoVLX-NEXT: retq 1014entry: 1015 %0 = bitcast <8 x i64> %__a to <32 x i16> 1016 %1 = bitcast <8 x i64> %__b to <32 x i16> 1017 %2 = icmp eq <32 x i16> %0, %1 1018 %3 = bitcast i32 %__u to <32 x i1> 1019 %4 = and <32 x i1> %2, %3 1020 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 1021 %6 = bitcast <64 x i1> %5 to i64 1022 ret i64 %6 1023} 1024 1025define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 1026; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: 1027; VLX: # %bb.0: # %entry 1028; VLX-NEXT: kmovd %edi, %k1 1029; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1} 1030; VLX-NEXT: kmovq %k0, %rax 1031; VLX-NEXT: vzeroupper 1032; VLX-NEXT: retq 1033; 1034; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: 1035; NoVLX: # %bb.0: # %entry 1036; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1 1037; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 1038; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 1039; NoVLX-NEXT: kmovw %k0, %eax 1040; NoVLX-NEXT: andl %edi, %eax 1041; NoVLX-NEXT: shrl $16, %edi 1042; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1043; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0 1044; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 1045; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 1046; NoVLX-NEXT: kmovw %k0, %ecx 1047; NoVLX-NEXT: andl %edi, %ecx 1048; NoVLX-NEXT: shll $16, %ecx 1049; NoVLX-NEXT: movzwl %ax, %eax 1050; NoVLX-NEXT: orl %ecx, %eax 1051; NoVLX-NEXT: vzeroupper 1052; NoVLX-NEXT: retq 1053entry: 1054 %0 = bitcast <8 x i64> %__a to <32 x i16> 1055 %load = load <8 x i64>, <8 x i64>* %__b 1056 %1 = bitcast <8 x i64> %load to <32 x i16> 1057 %2 = icmp eq <32 x i16> %0, %1 1058 %3 = bitcast i32 %__u to <32 x i1> 1059 %4 = and <32 x i1> %2, %3 1060 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 1061 %6 = bitcast <64 x i1> %5 to i64 1062 ret i64 %6 1063} 1064 1065 1066define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1067; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: 1068; VLX: # %bb.0: # %entry 1069; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1070; VLX-NEXT: kmovd %k0, %eax 1071; VLX-NEXT: # kill: def $al killed $al killed $eax 1072; VLX-NEXT: retq 1073; 1074; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: 1075; NoVLX: # %bb.0: # %entry 1076; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1077; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1078; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1079; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1080; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1081; NoVLX-NEXT: kmovw %k0, %eax 1082; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1083; NoVLX-NEXT: vzeroupper 1084; NoVLX-NEXT: retq 1085entry: 1086 %0 = bitcast <2 x i64> %__a to <4 x i32> 1087 %1 = bitcast <2 x i64> %__b to <4 x i32> 1088 %2 = icmp eq <4 x i32> %0, %1 1089 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1090 %4 = bitcast <8 x i1> %3 to i8 1091 ret i8 %4 1092} 1093 1094define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1095; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: 1096; VLX: # %bb.0: # %entry 1097; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1098; VLX-NEXT: kmovd %k0, %eax 1099; VLX-NEXT: # kill: def $al killed $al killed $eax 1100; VLX-NEXT: retq 1101; 1102; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: 1103; NoVLX: # %bb.0: # %entry 1104; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1105; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1106; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1107; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1108; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1109; NoVLX-NEXT: kmovw %k0, %eax 1110; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1111; NoVLX-NEXT: vzeroupper 1112; NoVLX-NEXT: retq 1113entry: 1114 %0 = bitcast <2 x i64> %__a to <4 x i32> 1115 %load = load <2 x i64>, <2 x i64>* %__b 1116 %1 = bitcast <2 x i64> %load to <4 x i32> 1117 %2 = icmp eq <4 x i32> %0, %1 1118 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1119 %4 = bitcast <8 x i1> %3 to i8 1120 ret i8 %4 1121} 1122 1123define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1124; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: 1125; VLX: # %bb.0: # %entry 1126; VLX-NEXT: kmovd %edi, %k1 1127; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1128; VLX-NEXT: kmovd %k0, %eax 1129; VLX-NEXT: # kill: def $al killed $al killed $eax 1130; VLX-NEXT: retq 1131; 1132; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: 1133; NoVLX: # %bb.0: # %entry 1134; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1135; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1136; NoVLX-NEXT: kmovw %edi, %k1 1137; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1138; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1139; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1140; NoVLX-NEXT: kmovw %k0, %eax 1141; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1142; NoVLX-NEXT: vzeroupper 1143; NoVLX-NEXT: retq 1144entry: 1145 %0 = bitcast <2 x i64> %__a to <4 x i32> 1146 %1 = bitcast <2 x i64> %__b to <4 x i32> 1147 %2 = icmp eq <4 x i32> %0, %1 1148 %3 = bitcast i8 %__u to <8 x i1> 1149 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1150 %4 = and <4 x i1> %2, %extract.i 1151 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1152 %6 = bitcast <8 x i1> %5 to i8 1153 ret i8 %6 1154} 1155 1156define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1157; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: 1158; VLX: # %bb.0: # %entry 1159; VLX-NEXT: kmovd %edi, %k1 1160; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1161; VLX-NEXT: kmovd %k0, %eax 1162; VLX-NEXT: # kill: def $al killed $al killed $eax 1163; VLX-NEXT: retq 1164; 1165; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: 1166; NoVLX: # %bb.0: # %entry 1167; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1168; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1169; NoVLX-NEXT: kmovw %edi, %k1 1170; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1171; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1172; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1173; NoVLX-NEXT: kmovw %k0, %eax 1174; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1175; NoVLX-NEXT: vzeroupper 1176; NoVLX-NEXT: retq 1177entry: 1178 %0 = bitcast <2 x i64> %__a to <4 x i32> 1179 %load = load <2 x i64>, <2 x i64>* %__b 1180 %1 = bitcast <2 x i64> %load to <4 x i32> 1181 %2 = icmp eq <4 x i32> %0, %1 1182 %3 = bitcast i8 %__u to <8 x i1> 1183 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1184 %4 = and <4 x i1> %2, %extract.i 1185 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1186 %6 = bitcast <8 x i1> %5 to i8 1187 ret i8 %6 1188} 1189 1190 1191define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 1192; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1193; VLX: # %bb.0: # %entry 1194; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1195; VLX-NEXT: kmovd %k0, %eax 1196; VLX-NEXT: # kill: def $al killed $al killed $eax 1197; VLX-NEXT: retq 1198; 1199; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1200; NoVLX: # %bb.0: # %entry 1201; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1202; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 1203; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1204; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1205; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1206; NoVLX-NEXT: kmovw %k0, %eax 1207; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1208; NoVLX-NEXT: vzeroupper 1209; NoVLX-NEXT: retq 1210entry: 1211 %0 = bitcast <2 x i64> %__a to <4 x i32> 1212 %load = load i32, i32* %__b 1213 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1214 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1215 %2 = icmp eq <4 x i32> %0, %1 1216 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1217 %4 = bitcast <8 x i1> %3 to i8 1218 ret i8 %4 1219} 1220 1221define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 1222; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1223; VLX: # %bb.0: # %entry 1224; VLX-NEXT: kmovd %edi, %k1 1225; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1226; VLX-NEXT: kmovd %k0, %eax 1227; VLX-NEXT: # kill: def $al killed $al killed $eax 1228; VLX-NEXT: retq 1229; 1230; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1231; NoVLX: # %bb.0: # %entry 1232; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1233; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 1234; NoVLX-NEXT: kmovw %edi, %k1 1235; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1236; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1237; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1238; NoVLX-NEXT: kmovw %k0, %eax 1239; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1240; NoVLX-NEXT: vzeroupper 1241; NoVLX-NEXT: retq 1242entry: 1243 %0 = bitcast <2 x i64> %__a to <4 x i32> 1244 %load = load i32, i32* %__b 1245 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1246 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1247 %2 = icmp eq <4 x i32> %0, %1 1248 %3 = bitcast i8 %__u to <8 x i1> 1249 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1250 %4 = and <4 x i1> %extract.i, %2 1251 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1252 %6 = bitcast <8 x i1> %5 to i8 1253 ret i8 %6 1254} 1255 1256 1257define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1258; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: 1259; VLX: # %bb.0: # %entry 1260; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1261; VLX-NEXT: kmovd %k0, %eax 1262; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1263; VLX-NEXT: retq 1264; 1265; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: 1266; NoVLX: # %bb.0: # %entry 1267; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1268; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1269; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1270; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1271; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1272; NoVLX-NEXT: kmovw %k0, %eax 1273; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1274; NoVLX-NEXT: vzeroupper 1275; NoVLX-NEXT: retq 1276entry: 1277 %0 = bitcast <2 x i64> %__a to <4 x i32> 1278 %1 = bitcast <2 x i64> %__b to <4 x i32> 1279 %2 = icmp eq <4 x i32> %0, %1 1280 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1281 %4 = bitcast <16 x i1> %3 to i16 1282 ret i16 %4 1283} 1284 1285define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1286; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: 1287; VLX: # %bb.0: # %entry 1288; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1289; VLX-NEXT: kmovd %k0, %eax 1290; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1291; VLX-NEXT: retq 1292; 1293; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: 1294; NoVLX: # %bb.0: # %entry 1295; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1296; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1297; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1298; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1299; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1300; NoVLX-NEXT: kmovw %k0, %eax 1301; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1302; NoVLX-NEXT: vzeroupper 1303; NoVLX-NEXT: retq 1304entry: 1305 %0 = bitcast <2 x i64> %__a to <4 x i32> 1306 %load = load <2 x i64>, <2 x i64>* %__b 1307 %1 = bitcast <2 x i64> %load to <4 x i32> 1308 %2 = icmp eq <4 x i32> %0, %1 1309 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1310 %4 = bitcast <16 x i1> %3 to i16 1311 ret i16 %4 1312} 1313 1314define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1315; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: 1316; VLX: # %bb.0: # %entry 1317; VLX-NEXT: kmovd %edi, %k1 1318; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1319; VLX-NEXT: kmovd %k0, %eax 1320; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1321; VLX-NEXT: retq 1322; 1323; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: 1324; NoVLX: # %bb.0: # %entry 1325; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1326; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1327; NoVLX-NEXT: kmovw %edi, %k1 1328; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1329; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1330; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1331; NoVLX-NEXT: kmovw %k0, %eax 1332; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1333; NoVLX-NEXT: vzeroupper 1334; NoVLX-NEXT: retq 1335entry: 1336 %0 = bitcast <2 x i64> %__a to <4 x i32> 1337 %1 = bitcast <2 x i64> %__b to <4 x i32> 1338 %2 = icmp eq <4 x i32> %0, %1 1339 %3 = bitcast i8 %__u to <8 x i1> 1340 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1341 %4 = and <4 x i1> %2, %extract.i 1342 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1343 %6 = bitcast <16 x i1> %5 to i16 1344 ret i16 %6 1345} 1346 1347define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1348; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: 1349; VLX: # %bb.0: # %entry 1350; VLX-NEXT: kmovd %edi, %k1 1351; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1352; VLX-NEXT: kmovd %k0, %eax 1353; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1354; VLX-NEXT: retq 1355; 1356; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: 1357; NoVLX: # %bb.0: # %entry 1358; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1359; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1360; NoVLX-NEXT: kmovw %edi, %k1 1361; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1362; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1363; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1364; NoVLX-NEXT: kmovw %k0, %eax 1365; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1366; NoVLX-NEXT: vzeroupper 1367; NoVLX-NEXT: retq 1368entry: 1369 %0 = bitcast <2 x i64> %__a to <4 x i32> 1370 %load = load <2 x i64>, <2 x i64>* %__b 1371 %1 = bitcast <2 x i64> %load to <4 x i32> 1372 %2 = icmp eq <4 x i32> %0, %1 1373 %3 = bitcast i8 %__u to <8 x i1> 1374 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1375 %4 = and <4 x i1> %2, %extract.i 1376 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1377 %6 = bitcast <16 x i1> %5 to i16 1378 ret i16 %6 1379} 1380 1381 1382define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 1383; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1384; VLX: # %bb.0: # %entry 1385; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1386; VLX-NEXT: kmovd %k0, %eax 1387; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1388; VLX-NEXT: retq 1389; 1390; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1391; NoVLX: # %bb.0: # %entry 1392; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1393; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 1394; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1395; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1396; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1397; NoVLX-NEXT: kmovw %k0, %eax 1398; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1399; NoVLX-NEXT: vzeroupper 1400; NoVLX-NEXT: retq 1401entry: 1402 %0 = bitcast <2 x i64> %__a to <4 x i32> 1403 %load = load i32, i32* %__b 1404 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1405 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1406 %2 = icmp eq <4 x i32> %0, %1 1407 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1408 %4 = bitcast <16 x i1> %3 to i16 1409 ret i16 %4 1410} 1411 1412define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 1413; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1414; VLX: # %bb.0: # %entry 1415; VLX-NEXT: kmovd %edi, %k1 1416; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1417; VLX-NEXT: kmovd %k0, %eax 1418; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1419; VLX-NEXT: retq 1420; 1421; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1422; NoVLX: # %bb.0: # %entry 1423; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1424; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 1425; NoVLX-NEXT: kmovw %edi, %k1 1426; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1427; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1428; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1429; NoVLX-NEXT: kmovw %k0, %eax 1430; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1431; NoVLX-NEXT: vzeroupper 1432; NoVLX-NEXT: retq 1433entry: 1434 %0 = bitcast <2 x i64> %__a to <4 x i32> 1435 %load = load i32, i32* %__b 1436 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1437 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1438 %2 = icmp eq <4 x i32> %0, %1 1439 %3 = bitcast i8 %__u to <8 x i1> 1440 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1441 %4 = and <4 x i1> %extract.i, %2 1442 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1443 %6 = bitcast <16 x i1> %5 to i16 1444 ret i16 %6 1445} 1446 1447 1448define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1449; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: 1450; VLX: # %bb.0: # %entry 1451; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1452; VLX-NEXT: kmovd %k0, %eax 1453; VLX-NEXT: retq 1454; 1455; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: 1456; NoVLX: # %bb.0: # %entry 1457; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1458; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1459; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1460; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1461; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1462; NoVLX-NEXT: kmovw %k0, %eax 1463; NoVLX-NEXT: vzeroupper 1464; NoVLX-NEXT: retq 1465entry: 1466 %0 = bitcast <2 x i64> %__a to <4 x i32> 1467 %1 = bitcast <2 x i64> %__b to <4 x i32> 1468 %2 = icmp eq <4 x i32> %0, %1 1469 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1470 %4 = bitcast <32 x i1> %3 to i32 1471 ret i32 %4 1472} 1473 1474define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1475; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: 1476; VLX: # %bb.0: # %entry 1477; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1478; VLX-NEXT: kmovd %k0, %eax 1479; VLX-NEXT: retq 1480; 1481; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: 1482; NoVLX: # %bb.0: # %entry 1483; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1484; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1485; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1486; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1487; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1488; NoVLX-NEXT: kmovw %k0, %eax 1489; NoVLX-NEXT: vzeroupper 1490; NoVLX-NEXT: retq 1491entry: 1492 %0 = bitcast <2 x i64> %__a to <4 x i32> 1493 %load = load <2 x i64>, <2 x i64>* %__b 1494 %1 = bitcast <2 x i64> %load to <4 x i32> 1495 %2 = icmp eq <4 x i32> %0, %1 1496 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1497 %4 = bitcast <32 x i1> %3 to i32 1498 ret i32 %4 1499} 1500 1501define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1502; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: 1503; VLX: # %bb.0: # %entry 1504; VLX-NEXT: kmovd %edi, %k1 1505; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1506; VLX-NEXT: kmovd %k0, %eax 1507; VLX-NEXT: retq 1508; 1509; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: 1510; NoVLX: # %bb.0: # %entry 1511; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1512; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1513; NoVLX-NEXT: kmovw %edi, %k1 1514; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1515; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1516; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1517; NoVLX-NEXT: kmovw %k0, %eax 1518; NoVLX-NEXT: vzeroupper 1519; NoVLX-NEXT: retq 1520entry: 1521 %0 = bitcast <2 x i64> %__a to <4 x i32> 1522 %1 = bitcast <2 x i64> %__b to <4 x i32> 1523 %2 = icmp eq <4 x i32> %0, %1 1524 %3 = bitcast i8 %__u to <8 x i1> 1525 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1526 %4 = and <4 x i1> %2, %extract.i 1527 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1528 %6 = bitcast <32 x i1> %5 to i32 1529 ret i32 %6 1530} 1531 1532define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1533; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: 1534; VLX: # %bb.0: # %entry 1535; VLX-NEXT: kmovd %edi, %k1 1536; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1537; VLX-NEXT: kmovd %k0, %eax 1538; VLX-NEXT: retq 1539; 1540; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: 1541; NoVLX: # %bb.0: # %entry 1542; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1543; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1544; NoVLX-NEXT: kmovw %edi, %k1 1545; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1546; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1547; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1548; NoVLX-NEXT: kmovw %k0, %eax 1549; NoVLX-NEXT: vzeroupper 1550; NoVLX-NEXT: retq 1551entry: 1552 %0 = bitcast <2 x i64> %__a to <4 x i32> 1553 %load = load <2 x i64>, <2 x i64>* %__b 1554 %1 = bitcast <2 x i64> %load to <4 x i32> 1555 %2 = icmp eq <4 x i32> %0, %1 1556 %3 = bitcast i8 %__u to <8 x i1> 1557 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1558 %4 = and <4 x i1> %2, %extract.i 1559 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1560 %6 = bitcast <32 x i1> %5 to i32 1561 ret i32 %6 1562} 1563 1564 1565define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 1566; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1567; VLX: # %bb.0: # %entry 1568; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1569; VLX-NEXT: kmovd %k0, %eax 1570; VLX-NEXT: retq 1571; 1572; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1573; NoVLX: # %bb.0: # %entry 1574; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1575; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 1576; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1577; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1578; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1579; NoVLX-NEXT: kmovw %k0, %eax 1580; NoVLX-NEXT: vzeroupper 1581; NoVLX-NEXT: retq 1582entry: 1583 %0 = bitcast <2 x i64> %__a to <4 x i32> 1584 %load = load i32, i32* %__b 1585 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1586 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1587 %2 = icmp eq <4 x i32> %0, %1 1588 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1589 %4 = bitcast <32 x i1> %3 to i32 1590 ret i32 %4 1591} 1592 1593define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 1594; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1595; VLX: # %bb.0: # %entry 1596; VLX-NEXT: kmovd %edi, %k1 1597; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1598; VLX-NEXT: kmovd %k0, %eax 1599; VLX-NEXT: retq 1600; 1601; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1602; NoVLX: # %bb.0: # %entry 1603; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1604; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 1605; NoVLX-NEXT: kmovw %edi, %k1 1606; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1607; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1608; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1609; NoVLX-NEXT: kmovw %k0, %eax 1610; NoVLX-NEXT: vzeroupper 1611; NoVLX-NEXT: retq 1612entry: 1613 %0 = bitcast <2 x i64> %__a to <4 x i32> 1614 %load = load i32, i32* %__b 1615 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1616 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1617 %2 = icmp eq <4 x i32> %0, %1 1618 %3 = bitcast i8 %__u to <8 x i1> 1619 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1620 %4 = and <4 x i1> %extract.i, %2 1621 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1622 %6 = bitcast <32 x i1> %5 to i32 1623 ret i32 %6 1624} 1625 1626 1627define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1628; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: 1629; VLX: # %bb.0: # %entry 1630; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1631; VLX-NEXT: kmovq %k0, %rax 1632; VLX-NEXT: retq 1633; 1634; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: 1635; NoVLX: # %bb.0: # %entry 1636; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1637; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1638; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1639; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1640; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1641; NoVLX-NEXT: kmovw %k0, %eax 1642; NoVLX-NEXT: movzwl %ax, %eax 1643; NoVLX-NEXT: vzeroupper 1644; NoVLX-NEXT: retq 1645entry: 1646 %0 = bitcast <2 x i64> %__a to <4 x i32> 1647 %1 = bitcast <2 x i64> %__b to <4 x i32> 1648 %2 = icmp eq <4 x i32> %0, %1 1649 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1650 %4 = bitcast <64 x i1> %3 to i64 1651 ret i64 %4 1652} 1653 1654define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1655; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: 1656; VLX: # %bb.0: # %entry 1657; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1658; VLX-NEXT: kmovq %k0, %rax 1659; VLX-NEXT: retq 1660; 1661; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: 1662; NoVLX: # %bb.0: # %entry 1663; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1664; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1665; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1666; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1667; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1668; NoVLX-NEXT: kmovw %k0, %eax 1669; NoVLX-NEXT: movzwl %ax, %eax 1670; NoVLX-NEXT: vzeroupper 1671; NoVLX-NEXT: retq 1672entry: 1673 %0 = bitcast <2 x i64> %__a to <4 x i32> 1674 %load = load <2 x i64>, <2 x i64>* %__b 1675 %1 = bitcast <2 x i64> %load to <4 x i32> 1676 %2 = icmp eq <4 x i32> %0, %1 1677 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1678 %4 = bitcast <64 x i1> %3 to i64 1679 ret i64 %4 1680} 1681 1682define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1683; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: 1684; VLX: # %bb.0: # %entry 1685; VLX-NEXT: kmovd %edi, %k1 1686; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1687; VLX-NEXT: kmovq %k0, %rax 1688; VLX-NEXT: retq 1689; 1690; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: 1691; NoVLX: # %bb.0: # %entry 1692; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1693; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1694; NoVLX-NEXT: kmovw %edi, %k1 1695; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1696; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1697; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1698; NoVLX-NEXT: kmovw %k0, %eax 1699; NoVLX-NEXT: movzwl %ax, %eax 1700; NoVLX-NEXT: vzeroupper 1701; NoVLX-NEXT: retq 1702entry: 1703 %0 = bitcast <2 x i64> %__a to <4 x i32> 1704 %1 = bitcast <2 x i64> %__b to <4 x i32> 1705 %2 = icmp eq <4 x i32> %0, %1 1706 %3 = bitcast i8 %__u to <8 x i1> 1707 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1708 %4 = and <4 x i1> %2, %extract.i 1709 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1710 %6 = bitcast <64 x i1> %5 to i64 1711 ret i64 %6 1712} 1713 1714define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 1715; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: 1716; VLX: # %bb.0: # %entry 1717; VLX-NEXT: kmovd %edi, %k1 1718; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1719; VLX-NEXT: kmovq %k0, %rax 1720; VLX-NEXT: retq 1721; 1722; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: 1723; NoVLX: # %bb.0: # %entry 1724; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1725; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1726; NoVLX-NEXT: kmovw %edi, %k1 1727; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1728; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1729; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1730; NoVLX-NEXT: kmovw %k0, %eax 1731; NoVLX-NEXT: movzwl %ax, %eax 1732; NoVLX-NEXT: vzeroupper 1733; NoVLX-NEXT: retq 1734entry: 1735 %0 = bitcast <2 x i64> %__a to <4 x i32> 1736 %load = load <2 x i64>, <2 x i64>* %__b 1737 %1 = bitcast <2 x i64> %load to <4 x i32> 1738 %2 = icmp eq <4 x i32> %0, %1 1739 %3 = bitcast i8 %__u to <8 x i1> 1740 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1741 %4 = and <4 x i1> %2, %extract.i 1742 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1743 %6 = bitcast <64 x i1> %5 to i64 1744 ret i64 %6 1745} 1746 1747 1748define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 1749; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1750; VLX: # %bb.0: # %entry 1751; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1752; VLX-NEXT: kmovq %k0, %rax 1753; VLX-NEXT: retq 1754; 1755; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1756; NoVLX: # %bb.0: # %entry 1757; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1758; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 1759; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1760; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1761; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1762; NoVLX-NEXT: kmovw %k0, %eax 1763; NoVLX-NEXT: movzwl %ax, %eax 1764; NoVLX-NEXT: vzeroupper 1765; NoVLX-NEXT: retq 1766entry: 1767 %0 = bitcast <2 x i64> %__a to <4 x i32> 1768 %load = load i32, i32* %__b 1769 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1770 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1771 %2 = icmp eq <4 x i32> %0, %1 1772 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1773 %4 = bitcast <64 x i1> %3 to i64 1774 ret i64 %4 1775} 1776 1777define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 1778; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1779; VLX: # %bb.0: # %entry 1780; VLX-NEXT: kmovd %edi, %k1 1781; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1782; VLX-NEXT: kmovq %k0, %rax 1783; VLX-NEXT: retq 1784; 1785; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1786; NoVLX: # %bb.0: # %entry 1787; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1788; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 1789; NoVLX-NEXT: kmovw %edi, %k1 1790; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1791; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1792; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1793; NoVLX-NEXT: kmovw %k0, %eax 1794; NoVLX-NEXT: movzwl %ax, %eax 1795; NoVLX-NEXT: vzeroupper 1796; NoVLX-NEXT: retq 1797entry: 1798 %0 = bitcast <2 x i64> %__a to <4 x i32> 1799 %load = load i32, i32* %__b 1800 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1801 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1802 %2 = icmp eq <4 x i32> %0, %1 1803 %3 = bitcast i8 %__u to <8 x i1> 1804 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1805 %4 = and <4 x i1> %extract.i, %2 1806 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1807 %6 = bitcast <64 x i1> %5 to i64 1808 ret i64 %6 1809} 1810 1811 1812define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 1813; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: 1814; VLX: # %bb.0: # %entry 1815; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 1816; VLX-NEXT: kmovd %k0, %eax 1817; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1818; VLX-NEXT: vzeroupper 1819; VLX-NEXT: retq 1820; 1821; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: 1822; NoVLX: # %bb.0: # %entry 1823; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1824; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1825; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1826; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1827; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1828; NoVLX-NEXT: kmovw %k0, %eax 1829; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1830; NoVLX-NEXT: vzeroupper 1831; NoVLX-NEXT: retq 1832entry: 1833 %0 = bitcast <4 x i64> %__a to <8 x i32> 1834 %1 = bitcast <4 x i64> %__b to <8 x i32> 1835 %2 = icmp eq <8 x i32> %0, %1 1836 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1837 %4 = bitcast <16 x i1> %3 to i16 1838 ret i16 %4 1839} 1840 1841define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 1842; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: 1843; VLX: # %bb.0: # %entry 1844; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 1845; VLX-NEXT: kmovd %k0, %eax 1846; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1847; VLX-NEXT: vzeroupper 1848; VLX-NEXT: retq 1849; 1850; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: 1851; NoVLX: # %bb.0: # %entry 1852; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1853; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 1854; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1855; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1856; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1857; NoVLX-NEXT: kmovw %k0, %eax 1858; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1859; NoVLX-NEXT: vzeroupper 1860; NoVLX-NEXT: retq 1861entry: 1862 %0 = bitcast <4 x i64> %__a to <8 x i32> 1863 %load = load <4 x i64>, <4 x i64>* %__b 1864 %1 = bitcast <4 x i64> %load to <8 x i32> 1865 %2 = icmp eq <8 x i32> %0, %1 1866 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1867 %4 = bitcast <16 x i1> %3 to i16 1868 ret i16 %4 1869} 1870 1871define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 1872; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: 1873; VLX: # %bb.0: # %entry 1874; VLX-NEXT: kmovd %edi, %k1 1875; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 1876; VLX-NEXT: kmovd %k0, %eax 1877; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1878; VLX-NEXT: vzeroupper 1879; VLX-NEXT: retq 1880; 1881; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: 1882; NoVLX: # %bb.0: # %entry 1883; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1884; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1885; NoVLX-NEXT: kmovw %edi, %k1 1886; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1887; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1888; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1889; NoVLX-NEXT: kmovw %k0, %eax 1890; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1891; NoVLX-NEXT: vzeroupper 1892; NoVLX-NEXT: retq 1893entry: 1894 %0 = bitcast <4 x i64> %__a to <8 x i32> 1895 %1 = bitcast <4 x i64> %__b to <8 x i32> 1896 %2 = icmp eq <8 x i32> %0, %1 1897 %3 = bitcast i8 %__u to <8 x i1> 1898 %4 = and <8 x i1> %2, %3 1899 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1900 %6 = bitcast <16 x i1> %5 to i16 1901 ret i16 %6 1902} 1903 1904define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 1905; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: 1906; VLX: # %bb.0: # %entry 1907; VLX-NEXT: kmovd %edi, %k1 1908; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 1909; VLX-NEXT: kmovd %k0, %eax 1910; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1911; VLX-NEXT: vzeroupper 1912; VLX-NEXT: retq 1913; 1914; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: 1915; NoVLX: # %bb.0: # %entry 1916; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1917; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 1918; NoVLX-NEXT: kmovw %edi, %k1 1919; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1920; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1921; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1922; NoVLX-NEXT: kmovw %k0, %eax 1923; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1924; NoVLX-NEXT: vzeroupper 1925; NoVLX-NEXT: retq 1926entry: 1927 %0 = bitcast <4 x i64> %__a to <8 x i32> 1928 %load = load <4 x i64>, <4 x i64>* %__b 1929 %1 = bitcast <4 x i64> %load to <8 x i32> 1930 %2 = icmp eq <8 x i32> %0, %1 1931 %3 = bitcast i8 %__u to <8 x i1> 1932 %4 = and <8 x i1> %2, %3 1933 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1934 %6 = bitcast <16 x i1> %5 to i16 1935 ret i16 %6 1936} 1937 1938 1939define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 1940; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1941; VLX: # %bb.0: # %entry 1942; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 1943; VLX-NEXT: kmovd %k0, %eax 1944; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1945; VLX-NEXT: vzeroupper 1946; VLX-NEXT: retq 1947; 1948; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1949; NoVLX: # %bb.0: # %entry 1950; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1951; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 1952; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1953; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1954; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1955; NoVLX-NEXT: kmovw %k0, %eax 1956; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1957; NoVLX-NEXT: vzeroupper 1958; NoVLX-NEXT: retq 1959entry: 1960 %0 = bitcast <4 x i64> %__a to <8 x i32> 1961 %load = load i32, i32* %__b 1962 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 1963 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1964 %2 = icmp eq <8 x i32> %0, %1 1965 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1966 %4 = bitcast <16 x i1> %3 to i16 1967 ret i16 %4 1968} 1969 1970define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 1971; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1972; VLX: # %bb.0: # %entry 1973; VLX-NEXT: kmovd %edi, %k1 1974; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 1975; VLX-NEXT: kmovd %k0, %eax 1976; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1977; VLX-NEXT: vzeroupper 1978; VLX-NEXT: retq 1979; 1980; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1981; NoVLX: # %bb.0: # %entry 1982; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1983; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 1984; NoVLX-NEXT: kmovw %edi, %k1 1985; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1986; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1987; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1988; NoVLX-NEXT: kmovw %k0, %eax 1989; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1990; NoVLX-NEXT: vzeroupper 1991; NoVLX-NEXT: retq 1992entry: 1993 %0 = bitcast <4 x i64> %__a to <8 x i32> 1994 %load = load i32, i32* %__b 1995 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 1996 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1997 %2 = icmp eq <8 x i32> %0, %1 1998 %3 = bitcast i8 %__u to <8 x i1> 1999 %4 = and <8 x i1> %3, %2 2000 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2001 %6 = bitcast <16 x i1> %5 to i16 2002 ret i16 %6 2003} 2004 2005 2006define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2007; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: 2008; VLX: # %bb.0: # %entry 2009; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 2010; VLX-NEXT: kmovd %k0, %eax 2011; VLX-NEXT: vzeroupper 2012; VLX-NEXT: retq 2013; 2014; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: 2015; NoVLX: # %bb.0: # %entry 2016; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2017; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2018; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2019; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2020; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2021; NoVLX-NEXT: kmovw %k0, %eax 2022; NoVLX-NEXT: vzeroupper 2023; NoVLX-NEXT: retq 2024entry: 2025 %0 = bitcast <4 x i64> %__a to <8 x i32> 2026 %1 = bitcast <4 x i64> %__b to <8 x i32> 2027 %2 = icmp eq <8 x i32> %0, %1 2028 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2029 %4 = bitcast <32 x i1> %3 to i32 2030 ret i32 %4 2031} 2032 2033define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 2034; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: 2035; VLX: # %bb.0: # %entry 2036; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 2037; VLX-NEXT: kmovd %k0, %eax 2038; VLX-NEXT: vzeroupper 2039; VLX-NEXT: retq 2040; 2041; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: 2042; NoVLX: # %bb.0: # %entry 2043; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2044; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 2045; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2046; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2047; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2048; NoVLX-NEXT: kmovw %k0, %eax 2049; NoVLX-NEXT: vzeroupper 2050; NoVLX-NEXT: retq 2051entry: 2052 %0 = bitcast <4 x i64> %__a to <8 x i32> 2053 %load = load <4 x i64>, <4 x i64>* %__b 2054 %1 = bitcast <4 x i64> %load to <8 x i32> 2055 %2 = icmp eq <8 x i32> %0, %1 2056 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2057 %4 = bitcast <32 x i1> %3 to i32 2058 ret i32 %4 2059} 2060 2061define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2062; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: 2063; VLX: # %bb.0: # %entry 2064; VLX-NEXT: kmovd %edi, %k1 2065; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 2066; VLX-NEXT: kmovd %k0, %eax 2067; VLX-NEXT: vzeroupper 2068; VLX-NEXT: retq 2069; 2070; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: 2071; NoVLX: # %bb.0: # %entry 2072; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2073; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2074; NoVLX-NEXT: kmovw %edi, %k1 2075; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2076; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2077; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2078; NoVLX-NEXT: kmovw %k0, %eax 2079; NoVLX-NEXT: vzeroupper 2080; NoVLX-NEXT: retq 2081entry: 2082 %0 = bitcast <4 x i64> %__a to <8 x i32> 2083 %1 = bitcast <4 x i64> %__b to <8 x i32> 2084 %2 = icmp eq <8 x i32> %0, %1 2085 %3 = bitcast i8 %__u to <8 x i1> 2086 %4 = and <8 x i1> %2, %3 2087 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2088 %6 = bitcast <32 x i1> %5 to i32 2089 ret i32 %6 2090} 2091 2092define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 2093; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: 2094; VLX: # %bb.0: # %entry 2095; VLX-NEXT: kmovd %edi, %k1 2096; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 2097; VLX-NEXT: kmovd %k0, %eax 2098; VLX-NEXT: vzeroupper 2099; VLX-NEXT: retq 2100; 2101; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: 2102; NoVLX: # %bb.0: # %entry 2103; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2104; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 2105; NoVLX-NEXT: kmovw %edi, %k1 2106; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2107; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2108; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2109; NoVLX-NEXT: kmovw %k0, %eax 2110; NoVLX-NEXT: vzeroupper 2111; NoVLX-NEXT: retq 2112entry: 2113 %0 = bitcast <4 x i64> %__a to <8 x i32> 2114 %load = load <4 x i64>, <4 x i64>* %__b 2115 %1 = bitcast <4 x i64> %load to <8 x i32> 2116 %2 = icmp eq <8 x i32> %0, %1 2117 %3 = bitcast i8 %__u to <8 x i1> 2118 %4 = and <8 x i1> %2, %3 2119 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2120 %6 = bitcast <32 x i1> %5 to i32 2121 ret i32 %6 2122} 2123 2124 2125define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 2126; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2127; VLX: # %bb.0: # %entry 2128; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 2129; VLX-NEXT: kmovd %k0, %eax 2130; VLX-NEXT: vzeroupper 2131; VLX-NEXT: retq 2132; 2133; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2134; NoVLX: # %bb.0: # %entry 2135; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2136; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 2137; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2138; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2139; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2140; NoVLX-NEXT: kmovw %k0, %eax 2141; NoVLX-NEXT: vzeroupper 2142; NoVLX-NEXT: retq 2143entry: 2144 %0 = bitcast <4 x i64> %__a to <8 x i32> 2145 %load = load i32, i32* %__b 2146 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2147 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2148 %2 = icmp eq <8 x i32> %0, %1 2149 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2150 %4 = bitcast <32 x i1> %3 to i32 2151 ret i32 %4 2152} 2153 2154define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 2155; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2156; VLX: # %bb.0: # %entry 2157; VLX-NEXT: kmovd %edi, %k1 2158; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 2159; VLX-NEXT: kmovd %k0, %eax 2160; VLX-NEXT: vzeroupper 2161; VLX-NEXT: retq 2162; 2163; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2164; NoVLX: # %bb.0: # %entry 2165; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2166; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 2167; NoVLX-NEXT: kmovw %edi, %k1 2168; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2169; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2170; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2171; NoVLX-NEXT: kmovw %k0, %eax 2172; NoVLX-NEXT: vzeroupper 2173; NoVLX-NEXT: retq 2174entry: 2175 %0 = bitcast <4 x i64> %__a to <8 x i32> 2176 %load = load i32, i32* %__b 2177 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2178 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2179 %2 = icmp eq <8 x i32> %0, %1 2180 %3 = bitcast i8 %__u to <8 x i1> 2181 %4 = and <8 x i1> %3, %2 2182 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2183 %6 = bitcast <32 x i1> %5 to i32 2184 ret i32 %6 2185} 2186 2187 2188define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2189; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: 2190; VLX: # %bb.0: # %entry 2191; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 2192; VLX-NEXT: kmovq %k0, %rax 2193; VLX-NEXT: vzeroupper 2194; VLX-NEXT: retq 2195; 2196; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: 2197; NoVLX: # %bb.0: # %entry 2198; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2199; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2200; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2201; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2202; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2203; NoVLX-NEXT: kmovw %k0, %eax 2204; NoVLX-NEXT: movzwl %ax, %eax 2205; NoVLX-NEXT: vzeroupper 2206; NoVLX-NEXT: retq 2207entry: 2208 %0 = bitcast <4 x i64> %__a to <8 x i32> 2209 %1 = bitcast <4 x i64> %__b to <8 x i32> 2210 %2 = icmp eq <8 x i32> %0, %1 2211 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2212 %4 = bitcast <64 x i1> %3 to i64 2213 ret i64 %4 2214} 2215 2216define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 2217; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: 2218; VLX: # %bb.0: # %entry 2219; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 2220; VLX-NEXT: kmovq %k0, %rax 2221; VLX-NEXT: vzeroupper 2222; VLX-NEXT: retq 2223; 2224; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: 2225; NoVLX: # %bb.0: # %entry 2226; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2227; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 2228; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2229; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2230; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2231; NoVLX-NEXT: kmovw %k0, %eax 2232; NoVLX-NEXT: movzwl %ax, %eax 2233; NoVLX-NEXT: vzeroupper 2234; NoVLX-NEXT: retq 2235entry: 2236 %0 = bitcast <4 x i64> %__a to <8 x i32> 2237 %load = load <4 x i64>, <4 x i64>* %__b 2238 %1 = bitcast <4 x i64> %load to <8 x i32> 2239 %2 = icmp eq <8 x i32> %0, %1 2240 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2241 %4 = bitcast <64 x i1> %3 to i64 2242 ret i64 %4 2243} 2244 2245define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2246; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: 2247; VLX: # %bb.0: # %entry 2248; VLX-NEXT: kmovd %edi, %k1 2249; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 2250; VLX-NEXT: kmovq %k0, %rax 2251; VLX-NEXT: vzeroupper 2252; VLX-NEXT: retq 2253; 2254; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: 2255; NoVLX: # %bb.0: # %entry 2256; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2257; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2258; NoVLX-NEXT: kmovw %edi, %k1 2259; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2260; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2261; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2262; NoVLX-NEXT: kmovw %k0, %eax 2263; NoVLX-NEXT: movzwl %ax, %eax 2264; NoVLX-NEXT: vzeroupper 2265; NoVLX-NEXT: retq 2266entry: 2267 %0 = bitcast <4 x i64> %__a to <8 x i32> 2268 %1 = bitcast <4 x i64> %__b to <8 x i32> 2269 %2 = icmp eq <8 x i32> %0, %1 2270 %3 = bitcast i8 %__u to <8 x i1> 2271 %4 = and <8 x i1> %2, %3 2272 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2273 %6 = bitcast <64 x i1> %5 to i64 2274 ret i64 %6 2275} 2276 2277define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 2278; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: 2279; VLX: # %bb.0: # %entry 2280; VLX-NEXT: kmovd %edi, %k1 2281; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 2282; VLX-NEXT: kmovq %k0, %rax 2283; VLX-NEXT: vzeroupper 2284; VLX-NEXT: retq 2285; 2286; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: 2287; NoVLX: # %bb.0: # %entry 2288; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2289; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 2290; NoVLX-NEXT: kmovw %edi, %k1 2291; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2292; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2293; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2294; NoVLX-NEXT: kmovw %k0, %eax 2295; NoVLX-NEXT: movzwl %ax, %eax 2296; NoVLX-NEXT: vzeroupper 2297; NoVLX-NEXT: retq 2298entry: 2299 %0 = bitcast <4 x i64> %__a to <8 x i32> 2300 %load = load <4 x i64>, <4 x i64>* %__b 2301 %1 = bitcast <4 x i64> %load to <8 x i32> 2302 %2 = icmp eq <8 x i32> %0, %1 2303 %3 = bitcast i8 %__u to <8 x i1> 2304 %4 = and <8 x i1> %2, %3 2305 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2306 %6 = bitcast <64 x i1> %5 to i64 2307 ret i64 %6 2308} 2309 2310 2311define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 2312; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2313; VLX: # %bb.0: # %entry 2314; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 2315; VLX-NEXT: kmovq %k0, %rax 2316; VLX-NEXT: vzeroupper 2317; VLX-NEXT: retq 2318; 2319; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2320; NoVLX: # %bb.0: # %entry 2321; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2322; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 2323; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2324; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2325; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2326; NoVLX-NEXT: kmovw %k0, %eax 2327; NoVLX-NEXT: movzwl %ax, %eax 2328; NoVLX-NEXT: vzeroupper 2329; NoVLX-NEXT: retq 2330entry: 2331 %0 = bitcast <4 x i64> %__a to <8 x i32> 2332 %load = load i32, i32* %__b 2333 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2334 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2335 %2 = icmp eq <8 x i32> %0, %1 2336 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2337 %4 = bitcast <64 x i1> %3 to i64 2338 ret i64 %4 2339} 2340 2341define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 2342; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2343; VLX: # %bb.0: # %entry 2344; VLX-NEXT: kmovd %edi, %k1 2345; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 2346; VLX-NEXT: kmovq %k0, %rax 2347; VLX-NEXT: vzeroupper 2348; VLX-NEXT: retq 2349; 2350; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2351; NoVLX: # %bb.0: # %entry 2352; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2353; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 2354; NoVLX-NEXT: kmovw %edi, %k1 2355; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2356; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2357; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2358; NoVLX-NEXT: kmovw %k0, %eax 2359; NoVLX-NEXT: movzwl %ax, %eax 2360; NoVLX-NEXT: vzeroupper 2361; NoVLX-NEXT: retq 2362entry: 2363 %0 = bitcast <4 x i64> %__a to <8 x i32> 2364 %load = load i32, i32* %__b 2365 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2366 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2367 %2 = icmp eq <8 x i32> %0, %1 2368 %3 = bitcast i8 %__u to <8 x i1> 2369 %4 = and <8 x i1> %3, %2 2370 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2371 %6 = bitcast <64 x i1> %5 to i64 2372 ret i64 %6 2373} 2374 2375 2376define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2377; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: 2378; VLX: # %bb.0: # %entry 2379; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2380; VLX-NEXT: kmovd %k0, %eax 2381; VLX-NEXT: vzeroupper 2382; VLX-NEXT: retq 2383; 2384; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: 2385; NoVLX: # %bb.0: # %entry 2386; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2387; NoVLX-NEXT: kmovw %k0, %eax 2388; NoVLX-NEXT: vzeroupper 2389; NoVLX-NEXT: retq 2390entry: 2391 %0 = bitcast <8 x i64> %__a to <16 x i32> 2392 %1 = bitcast <8 x i64> %__b to <16 x i32> 2393 %2 = icmp eq <16 x i32> %0, %1 2394 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2395 %4 = bitcast <32 x i1> %3 to i32 2396 ret i32 %4 2397} 2398 2399define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 2400; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: 2401; VLX: # %bb.0: # %entry 2402; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2403; VLX-NEXT: kmovd %k0, %eax 2404; VLX-NEXT: vzeroupper 2405; VLX-NEXT: retq 2406; 2407; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: 2408; NoVLX: # %bb.0: # %entry 2409; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2410; NoVLX-NEXT: kmovw %k0, %eax 2411; NoVLX-NEXT: vzeroupper 2412; NoVLX-NEXT: retq 2413entry: 2414 %0 = bitcast <8 x i64> %__a to <16 x i32> 2415 %load = load <8 x i64>, <8 x i64>* %__b 2416 %1 = bitcast <8 x i64> %load to <16 x i32> 2417 %2 = icmp eq <16 x i32> %0, %1 2418 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2419 %4 = bitcast <32 x i1> %3 to i32 2420 ret i32 %4 2421} 2422 2423define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2424; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: 2425; VLX: # %bb.0: # %entry 2426; VLX-NEXT: kmovd %edi, %k1 2427; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2428; VLX-NEXT: kmovd %k0, %eax 2429; VLX-NEXT: vzeroupper 2430; VLX-NEXT: retq 2431; 2432; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: 2433; NoVLX: # %bb.0: # %entry 2434; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2435; NoVLX-NEXT: kmovw %k0, %eax 2436; NoVLX-NEXT: andl %edi, %eax 2437; NoVLX-NEXT: vzeroupper 2438; NoVLX-NEXT: retq 2439entry: 2440 %0 = bitcast <8 x i64> %__a to <16 x i32> 2441 %1 = bitcast <8 x i64> %__b to <16 x i32> 2442 %2 = icmp eq <16 x i32> %0, %1 2443 %3 = bitcast i16 %__u to <16 x i1> 2444 %4 = and <16 x i1> %2, %3 2445 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2446 %6 = bitcast <32 x i1> %5 to i32 2447 ret i32 %6 2448} 2449 2450define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 2451; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: 2452; VLX: # %bb.0: # %entry 2453; VLX-NEXT: kmovd %edi, %k1 2454; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} 2455; VLX-NEXT: kmovd %k0, %eax 2456; VLX-NEXT: vzeroupper 2457; VLX-NEXT: retq 2458; 2459; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: 2460; NoVLX: # %bb.0: # %entry 2461; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 2462; NoVLX-NEXT: kmovw %k0, %eax 2463; NoVLX-NEXT: andl %edi, %eax 2464; NoVLX-NEXT: vzeroupper 2465; NoVLX-NEXT: retq 2466entry: 2467 %0 = bitcast <8 x i64> %__a to <16 x i32> 2468 %load = load <8 x i64>, <8 x i64>* %__b 2469 %1 = bitcast <8 x i64> %load to <16 x i32> 2470 %2 = icmp eq <16 x i32> %0, %1 2471 %3 = bitcast i16 %__u to <16 x i1> 2472 %4 = and <16 x i1> %2, %3 2473 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2474 %6 = bitcast <32 x i1> %5 to i32 2475 ret i32 %6 2476} 2477 2478 2479define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 2480; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2481; VLX: # %bb.0: # %entry 2482; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2483; VLX-NEXT: kmovd %k0, %eax 2484; VLX-NEXT: vzeroupper 2485; VLX-NEXT: retq 2486; 2487; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2488; NoVLX: # %bb.0: # %entry 2489; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2490; NoVLX-NEXT: kmovw %k0, %eax 2491; NoVLX-NEXT: vzeroupper 2492; NoVLX-NEXT: retq 2493entry: 2494 %0 = bitcast <8 x i64> %__a to <16 x i32> 2495 %load = load i32, i32* %__b 2496 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2497 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2498 %2 = icmp eq <16 x i32> %0, %1 2499 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2500 %4 = bitcast <32 x i1> %3 to i32 2501 ret i32 %4 2502} 2503 2504define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 2505; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2506; VLX: # %bb.0: # %entry 2507; VLX-NEXT: kmovd %edi, %k1 2508; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2509; VLX-NEXT: kmovd %k0, %eax 2510; VLX-NEXT: vzeroupper 2511; VLX-NEXT: retq 2512; 2513; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2514; NoVLX: # %bb.0: # %entry 2515; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 2516; NoVLX-NEXT: kmovw %k0, %eax 2517; NoVLX-NEXT: andl %edi, %eax 2518; NoVLX-NEXT: vzeroupper 2519; NoVLX-NEXT: retq 2520entry: 2521 %0 = bitcast <8 x i64> %__a to <16 x i32> 2522 %load = load i32, i32* %__b 2523 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2524 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2525 %2 = icmp eq <16 x i32> %0, %1 2526 %3 = bitcast i16 %__u to <16 x i1> 2527 %4 = and <16 x i1> %3, %2 2528 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2529 %6 = bitcast <32 x i1> %5 to i32 2530 ret i32 %6 2531} 2532 2533 2534define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2535; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: 2536; VLX: # %bb.0: # %entry 2537; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2538; VLX-NEXT: kmovq %k0, %rax 2539; VLX-NEXT: vzeroupper 2540; VLX-NEXT: retq 2541; 2542; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: 2543; NoVLX: # %bb.0: # %entry 2544; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2545; NoVLX-NEXT: kmovw %k0, %eax 2546; NoVLX-NEXT: movzwl %ax, %eax 2547; NoVLX-NEXT: vzeroupper 2548; NoVLX-NEXT: retq 2549entry: 2550 %0 = bitcast <8 x i64> %__a to <16 x i32> 2551 %1 = bitcast <8 x i64> %__b to <16 x i32> 2552 %2 = icmp eq <16 x i32> %0, %1 2553 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2554 %4 = bitcast <64 x i1> %3 to i64 2555 ret i64 %4 2556} 2557 2558define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 2559; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: 2560; VLX: # %bb.0: # %entry 2561; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2562; VLX-NEXT: kmovq %k0, %rax 2563; VLX-NEXT: vzeroupper 2564; VLX-NEXT: retq 2565; 2566; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: 2567; NoVLX: # %bb.0: # %entry 2568; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2569; NoVLX-NEXT: kmovw %k0, %eax 2570; NoVLX-NEXT: movzwl %ax, %eax 2571; NoVLX-NEXT: vzeroupper 2572; NoVLX-NEXT: retq 2573entry: 2574 %0 = bitcast <8 x i64> %__a to <16 x i32> 2575 %load = load <8 x i64>, <8 x i64>* %__b 2576 %1 = bitcast <8 x i64> %load to <16 x i32> 2577 %2 = icmp eq <16 x i32> %0, %1 2578 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2579 %4 = bitcast <64 x i1> %3 to i64 2580 ret i64 %4 2581} 2582 2583define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2584; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: 2585; VLX: # %bb.0: # %entry 2586; VLX-NEXT: kmovd %edi, %k1 2587; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2588; VLX-NEXT: kmovq %k0, %rax 2589; VLX-NEXT: vzeroupper 2590; VLX-NEXT: retq 2591; 2592; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: 2593; NoVLX: # %bb.0: # %entry 2594; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2595; NoVLX-NEXT: kmovw %k0, %eax 2596; NoVLX-NEXT: andl %edi, %eax 2597; NoVLX-NEXT: vzeroupper 2598; NoVLX-NEXT: retq 2599entry: 2600 %0 = bitcast <8 x i64> %__a to <16 x i32> 2601 %1 = bitcast <8 x i64> %__b to <16 x i32> 2602 %2 = icmp eq <16 x i32> %0, %1 2603 %3 = bitcast i16 %__u to <16 x i1> 2604 %4 = and <16 x i1> %2, %3 2605 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2606 %6 = bitcast <64 x i1> %5 to i64 2607 ret i64 %6 2608} 2609 2610define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 2611; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: 2612; VLX: # %bb.0: # %entry 2613; VLX-NEXT: kmovd %edi, %k1 2614; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} 2615; VLX-NEXT: kmovq %k0, %rax 2616; VLX-NEXT: vzeroupper 2617; VLX-NEXT: retq 2618; 2619; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: 2620; NoVLX: # %bb.0: # %entry 2621; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 2622; NoVLX-NEXT: kmovw %k0, %eax 2623; NoVLX-NEXT: andl %edi, %eax 2624; NoVLX-NEXT: vzeroupper 2625; NoVLX-NEXT: retq 2626entry: 2627 %0 = bitcast <8 x i64> %__a to <16 x i32> 2628 %load = load <8 x i64>, <8 x i64>* %__b 2629 %1 = bitcast <8 x i64> %load to <16 x i32> 2630 %2 = icmp eq <16 x i32> %0, %1 2631 %3 = bitcast i16 %__u to <16 x i1> 2632 %4 = and <16 x i1> %2, %3 2633 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2634 %6 = bitcast <64 x i1> %5 to i64 2635 ret i64 %6 2636} 2637 2638 2639define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 2640; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2641; VLX: # %bb.0: # %entry 2642; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2643; VLX-NEXT: kmovq %k0, %rax 2644; VLX-NEXT: vzeroupper 2645; VLX-NEXT: retq 2646; 2647; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2648; NoVLX: # %bb.0: # %entry 2649; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2650; NoVLX-NEXT: kmovw %k0, %eax 2651; NoVLX-NEXT: movzwl %ax, %eax 2652; NoVLX-NEXT: vzeroupper 2653; NoVLX-NEXT: retq 2654entry: 2655 %0 = bitcast <8 x i64> %__a to <16 x i32> 2656 %load = load i32, i32* %__b 2657 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2658 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2659 %2 = icmp eq <16 x i32> %0, %1 2660 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2661 %4 = bitcast <64 x i1> %3 to i64 2662 ret i64 %4 2663} 2664 2665define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 2666; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2667; VLX: # %bb.0: # %entry 2668; VLX-NEXT: kmovd %edi, %k1 2669; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2670; VLX-NEXT: kmovq %k0, %rax 2671; VLX-NEXT: vzeroupper 2672; VLX-NEXT: retq 2673; 2674; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2675; NoVLX: # %bb.0: # %entry 2676; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 2677; NoVLX-NEXT: kmovw %k0, %eax 2678; NoVLX-NEXT: andl %edi, %eax 2679; NoVLX-NEXT: vzeroupper 2680; NoVLX-NEXT: retq 2681entry: 2682 %0 = bitcast <8 x i64> %__a to <16 x i32> 2683 %load = load i32, i32* %__b 2684 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2685 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2686 %2 = icmp eq <16 x i32> %0, %1 2687 %3 = bitcast i16 %__u to <16 x i1> 2688 %4 = and <16 x i1> %3, %2 2689 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2690 %6 = bitcast <64 x i1> %5 to i64 2691 ret i64 %6 2692} 2693 2694 2695define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2696; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: 2697; VLX: # %bb.0: # %entry 2698; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 2699; VLX-NEXT: kmovb %k0, %eax 2700; VLX-NEXT: retq 2701; 2702; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: 2703; NoVLX: # %bb.0: # %entry 2704; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2705; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2706; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2707; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2708; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2709; NoVLX-NEXT: kmovw %k0, %eax 2710; NoVLX-NEXT: andl $3, %eax 2711; NoVLX-NEXT: vzeroupper 2712; NoVLX-NEXT: retq 2713entry: 2714 %0 = bitcast <2 x i64> %__a to <2 x i64> 2715 %1 = bitcast <2 x i64> %__b to <2 x i64> 2716 %2 = icmp eq <2 x i64> %0, %1 2717 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2718 %4 = bitcast <4 x i1> %3 to i4 2719 ret i4 %4 2720} 2721 2722define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 2723; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: 2724; VLX: # %bb.0: # %entry 2725; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 2726; VLX-NEXT: kmovb %k0, %eax 2727; VLX-NEXT: retq 2728; 2729; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: 2730; NoVLX: # %bb.0: # %entry 2731; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2732; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 2733; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2734; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2735; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2736; NoVLX-NEXT: kmovw %k0, %eax 2737; NoVLX-NEXT: andl $3, %eax 2738; NoVLX-NEXT: vzeroupper 2739; NoVLX-NEXT: retq 2740entry: 2741 %0 = bitcast <2 x i64> %__a to <2 x i64> 2742 %load = load <2 x i64>, <2 x i64>* %__b 2743 %1 = bitcast <2 x i64> %load to <2 x i64> 2744 %2 = icmp eq <2 x i64> %0, %1 2745 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2746 %4 = bitcast <4 x i1> %3 to i4 2747 ret i4 %4 2748} 2749 2750define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2751; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: 2752; VLX: # %bb.0: # %entry 2753; VLX-NEXT: kmovd %edi, %k1 2754; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 2755; VLX-NEXT: kmovb %k0, %eax 2756; VLX-NEXT: retq 2757; 2758; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: 2759; NoVLX: # %bb.0: # %entry 2760; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2761; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2762; NoVLX-NEXT: kmovw %edi, %k1 2763; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2764; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2765; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2766; NoVLX-NEXT: kmovw %k0, %eax 2767; NoVLX-NEXT: andl $3, %eax 2768; NoVLX-NEXT: vzeroupper 2769; NoVLX-NEXT: retq 2770entry: 2771 %0 = bitcast <2 x i64> %__a to <2 x i64> 2772 %1 = bitcast <2 x i64> %__b to <2 x i64> 2773 %2 = icmp eq <2 x i64> %0, %1 2774 %3 = bitcast i8 %__u to <8 x i1> 2775 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2776 %4 = and <2 x i1> %2, %extract.i 2777 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2778 %6 = bitcast <4 x i1> %5 to i4 2779 ret i4 %6 2780} 2781 2782define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 2783; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: 2784; VLX: # %bb.0: # %entry 2785; VLX-NEXT: kmovd %edi, %k1 2786; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 2787; VLX-NEXT: kmovb %k0, %eax 2788; VLX-NEXT: retq 2789; 2790; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: 2791; NoVLX: # %bb.0: # %entry 2792; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2793; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 2794; NoVLX-NEXT: kmovw %edi, %k1 2795; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2796; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2797; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2798; NoVLX-NEXT: kmovw %k0, %eax 2799; NoVLX-NEXT: andl $3, %eax 2800; NoVLX-NEXT: vzeroupper 2801; NoVLX-NEXT: retq 2802entry: 2803 %0 = bitcast <2 x i64> %__a to <2 x i64> 2804 %load = load <2 x i64>, <2 x i64>* %__b 2805 %1 = bitcast <2 x i64> %load to <2 x i64> 2806 %2 = icmp eq <2 x i64> %0, %1 2807 %3 = bitcast i8 %__u to <8 x i1> 2808 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2809 %4 = and <2 x i1> %2, %extract.i 2810 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2811 %6 = bitcast <4 x i1> %5 to i4 2812 ret i4 %6 2813} 2814 2815 2816define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 2817; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2818; VLX: # %bb.0: # %entry 2819; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 2820; VLX-NEXT: kmovb %k0, %eax 2821; VLX-NEXT: retq 2822; 2823; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2824; NoVLX: # %bb.0: # %entry 2825; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2826; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 2827; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2828; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2829; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2830; NoVLX-NEXT: kmovw %k0, %eax 2831; NoVLX-NEXT: andl $3, %eax 2832; NoVLX-NEXT: vzeroupper 2833; NoVLX-NEXT: retq 2834entry: 2835 %0 = bitcast <2 x i64> %__a to <2 x i64> 2836 %load = load i64, i64* %__b 2837 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 2838 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2839 %2 = icmp eq <2 x i64> %0, %1 2840 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2841 %4 = bitcast <4 x i1> %3 to i4 2842 ret i4 %4 2843} 2844 2845define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 2846; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2847; VLX: # %bb.0: # %entry 2848; VLX-NEXT: kmovd %edi, %k1 2849; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 2850; VLX-NEXT: kmovb %k0, %eax 2851; VLX-NEXT: retq 2852; 2853; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2854; NoVLX: # %bb.0: # %entry 2855; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2856; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 2857; NoVLX-NEXT: kmovw %edi, %k1 2858; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2859; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2860; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2861; NoVLX-NEXT: kmovw %k0, %eax 2862; NoVLX-NEXT: andl $3, %eax 2863; NoVLX-NEXT: vzeroupper 2864; NoVLX-NEXT: retq 2865entry: 2866 %0 = bitcast <2 x i64> %__a to <2 x i64> 2867 %load = load i64, i64* %__b 2868 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 2869 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2870 %2 = icmp eq <2 x i64> %0, %1 2871 %3 = bitcast i8 %__u to <8 x i1> 2872 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2873 %4 = and <2 x i1> %extract.i, %2 2874 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2875 %6 = bitcast <4 x i1> %5 to i4 2876 ret i4 %6 2877} 2878 2879 2880define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2881; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: 2882; VLX: # %bb.0: # %entry 2883; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 2884; VLX-NEXT: kmovd %k0, %eax 2885; VLX-NEXT: # kill: def $al killed $al killed $eax 2886; VLX-NEXT: retq 2887; 2888; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: 2889; NoVLX: # %bb.0: # %entry 2890; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2891; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2892; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2893; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2894; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2895; NoVLX-NEXT: kmovw %k0, %eax 2896; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2897; NoVLX-NEXT: vzeroupper 2898; NoVLX-NEXT: retq 2899entry: 2900 %0 = bitcast <2 x i64> %__a to <2 x i64> 2901 %1 = bitcast <2 x i64> %__b to <2 x i64> 2902 %2 = icmp eq <2 x i64> %0, %1 2903 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2904 %4 = bitcast <8 x i1> %3 to i8 2905 ret i8 %4 2906} 2907 2908define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 2909; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: 2910; VLX: # %bb.0: # %entry 2911; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 2912; VLX-NEXT: kmovd %k0, %eax 2913; VLX-NEXT: # kill: def $al killed $al killed $eax 2914; VLX-NEXT: retq 2915; 2916; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: 2917; NoVLX: # %bb.0: # %entry 2918; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2919; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 2920; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2921; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2922; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2923; NoVLX-NEXT: kmovw %k0, %eax 2924; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2925; NoVLX-NEXT: vzeroupper 2926; NoVLX-NEXT: retq 2927entry: 2928 %0 = bitcast <2 x i64> %__a to <2 x i64> 2929 %load = load <2 x i64>, <2 x i64>* %__b 2930 %1 = bitcast <2 x i64> %load to <2 x i64> 2931 %2 = icmp eq <2 x i64> %0, %1 2932 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2933 %4 = bitcast <8 x i1> %3 to i8 2934 ret i8 %4 2935} 2936 2937define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2938; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: 2939; VLX: # %bb.0: # %entry 2940; VLX-NEXT: kmovd %edi, %k1 2941; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 2942; VLX-NEXT: kmovd %k0, %eax 2943; VLX-NEXT: # kill: def $al killed $al killed $eax 2944; VLX-NEXT: retq 2945; 2946; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: 2947; NoVLX: # %bb.0: # %entry 2948; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2949; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2950; NoVLX-NEXT: kmovw %edi, %k1 2951; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2952; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2953; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2954; NoVLX-NEXT: kmovw %k0, %eax 2955; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2956; NoVLX-NEXT: vzeroupper 2957; NoVLX-NEXT: retq 2958entry: 2959 %0 = bitcast <2 x i64> %__a to <2 x i64> 2960 %1 = bitcast <2 x i64> %__b to <2 x i64> 2961 %2 = icmp eq <2 x i64> %0, %1 2962 %3 = bitcast i8 %__u to <8 x i1> 2963 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2964 %4 = and <2 x i1> %2, %extract.i 2965 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2966 %6 = bitcast <8 x i1> %5 to i8 2967 ret i8 %6 2968} 2969 2970define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 2971; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: 2972; VLX: # %bb.0: # %entry 2973; VLX-NEXT: kmovd %edi, %k1 2974; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 2975; VLX-NEXT: kmovd %k0, %eax 2976; VLX-NEXT: # kill: def $al killed $al killed $eax 2977; VLX-NEXT: retq 2978; 2979; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: 2980; NoVLX: # %bb.0: # %entry 2981; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2982; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 2983; NoVLX-NEXT: kmovw %edi, %k1 2984; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2985; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2986; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2987; NoVLX-NEXT: kmovw %k0, %eax 2988; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2989; NoVLX-NEXT: vzeroupper 2990; NoVLX-NEXT: retq 2991entry: 2992 %0 = bitcast <2 x i64> %__a to <2 x i64> 2993 %load = load <2 x i64>, <2 x i64>* %__b 2994 %1 = bitcast <2 x i64> %load to <2 x i64> 2995 %2 = icmp eq <2 x i64> %0, %1 2996 %3 = bitcast i8 %__u to <8 x i1> 2997 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2998 %4 = and <2 x i1> %2, %extract.i 2999 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3000 %6 = bitcast <8 x i1> %5 to i8 3001 ret i8 %6 3002} 3003 3004 3005define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 3006; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3007; VLX: # %bb.0: # %entry 3008; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3009; VLX-NEXT: kmovd %k0, %eax 3010; VLX-NEXT: # kill: def $al killed $al killed $eax 3011; VLX-NEXT: retq 3012; 3013; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3014; NoVLX: # %bb.0: # %entry 3015; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3016; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 3017; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3018; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3019; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3020; NoVLX-NEXT: kmovw %k0, %eax 3021; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3022; NoVLX-NEXT: vzeroupper 3023; NoVLX-NEXT: retq 3024entry: 3025 %0 = bitcast <2 x i64> %__a to <2 x i64> 3026 %load = load i64, i64* %__b 3027 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3028 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3029 %2 = icmp eq <2 x i64> %0, %1 3030 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3031 %4 = bitcast <8 x i1> %3 to i8 3032 ret i8 %4 3033} 3034 3035define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 3036; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3037; VLX: # %bb.0: # %entry 3038; VLX-NEXT: kmovd %edi, %k1 3039; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3040; VLX-NEXT: kmovd %k0, %eax 3041; VLX-NEXT: # kill: def $al killed $al killed $eax 3042; VLX-NEXT: retq 3043; 3044; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3045; NoVLX: # %bb.0: # %entry 3046; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3047; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 3048; NoVLX-NEXT: kmovw %edi, %k1 3049; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3050; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3051; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3052; NoVLX-NEXT: kmovw %k0, %eax 3053; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3054; NoVLX-NEXT: vzeroupper 3055; NoVLX-NEXT: retq 3056entry: 3057 %0 = bitcast <2 x i64> %__a to <2 x i64> 3058 %load = load i64, i64* %__b 3059 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3060 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3061 %2 = icmp eq <2 x i64> %0, %1 3062 %3 = bitcast i8 %__u to <8 x i1> 3063 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3064 %4 = and <2 x i1> %extract.i, %2 3065 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3066 %6 = bitcast <8 x i1> %5 to i8 3067 ret i8 %6 3068} 3069 3070 3071define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3072; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: 3073; VLX: # %bb.0: # %entry 3074; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3075; VLX-NEXT: kmovd %k0, %eax 3076; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3077; VLX-NEXT: retq 3078; 3079; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: 3080; NoVLX: # %bb.0: # %entry 3081; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3082; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3083; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3084; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3085; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3086; NoVLX-NEXT: kmovw %k0, %eax 3087; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3088; NoVLX-NEXT: vzeroupper 3089; NoVLX-NEXT: retq 3090entry: 3091 %0 = bitcast <2 x i64> %__a to <2 x i64> 3092 %1 = bitcast <2 x i64> %__b to <2 x i64> 3093 %2 = icmp eq <2 x i64> %0, %1 3094 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3095 %4 = bitcast <16 x i1> %3 to i16 3096 ret i16 %4 3097} 3098 3099define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3100; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: 3101; VLX: # %bb.0: # %entry 3102; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3103; VLX-NEXT: kmovd %k0, %eax 3104; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3105; VLX-NEXT: retq 3106; 3107; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: 3108; NoVLX: # %bb.0: # %entry 3109; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3110; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3111; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3112; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3113; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3114; NoVLX-NEXT: kmovw %k0, %eax 3115; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3116; NoVLX-NEXT: vzeroupper 3117; NoVLX-NEXT: retq 3118entry: 3119 %0 = bitcast <2 x i64> %__a to <2 x i64> 3120 %load = load <2 x i64>, <2 x i64>* %__b 3121 %1 = bitcast <2 x i64> %load to <2 x i64> 3122 %2 = icmp eq <2 x i64> %0, %1 3123 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3124 %4 = bitcast <16 x i1> %3 to i16 3125 ret i16 %4 3126} 3127 3128define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3129; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: 3130; VLX: # %bb.0: # %entry 3131; VLX-NEXT: kmovd %edi, %k1 3132; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3133; VLX-NEXT: kmovd %k0, %eax 3134; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3135; VLX-NEXT: retq 3136; 3137; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: 3138; NoVLX: # %bb.0: # %entry 3139; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3140; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3141; NoVLX-NEXT: kmovw %edi, %k1 3142; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3143; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3144; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3145; NoVLX-NEXT: kmovw %k0, %eax 3146; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3147; NoVLX-NEXT: vzeroupper 3148; NoVLX-NEXT: retq 3149entry: 3150 %0 = bitcast <2 x i64> %__a to <2 x i64> 3151 %1 = bitcast <2 x i64> %__b to <2 x i64> 3152 %2 = icmp eq <2 x i64> %0, %1 3153 %3 = bitcast i8 %__u to <8 x i1> 3154 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3155 %4 = and <2 x i1> %2, %extract.i 3156 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3157 %6 = bitcast <16 x i1> %5 to i16 3158 ret i16 %6 3159} 3160 3161define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3162; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: 3163; VLX: # %bb.0: # %entry 3164; VLX-NEXT: kmovd %edi, %k1 3165; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3166; VLX-NEXT: kmovd %k0, %eax 3167; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3168; VLX-NEXT: retq 3169; 3170; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: 3171; NoVLX: # %bb.0: # %entry 3172; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3173; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3174; NoVLX-NEXT: kmovw %edi, %k1 3175; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3176; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3177; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3178; NoVLX-NEXT: kmovw %k0, %eax 3179; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3180; NoVLX-NEXT: vzeroupper 3181; NoVLX-NEXT: retq 3182entry: 3183 %0 = bitcast <2 x i64> %__a to <2 x i64> 3184 %load = load <2 x i64>, <2 x i64>* %__b 3185 %1 = bitcast <2 x i64> %load to <2 x i64> 3186 %2 = icmp eq <2 x i64> %0, %1 3187 %3 = bitcast i8 %__u to <8 x i1> 3188 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3189 %4 = and <2 x i1> %2, %extract.i 3190 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3191 %6 = bitcast <16 x i1> %5 to i16 3192 ret i16 %6 3193} 3194 3195 3196define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 3197; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3198; VLX: # %bb.0: # %entry 3199; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3200; VLX-NEXT: kmovd %k0, %eax 3201; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3202; VLX-NEXT: retq 3203; 3204; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3205; NoVLX: # %bb.0: # %entry 3206; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3207; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 3208; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3209; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3210; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3211; NoVLX-NEXT: kmovw %k0, %eax 3212; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3213; NoVLX-NEXT: vzeroupper 3214; NoVLX-NEXT: retq 3215entry: 3216 %0 = bitcast <2 x i64> %__a to <2 x i64> 3217 %load = load i64, i64* %__b 3218 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3219 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3220 %2 = icmp eq <2 x i64> %0, %1 3221 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3222 %4 = bitcast <16 x i1> %3 to i16 3223 ret i16 %4 3224} 3225 3226define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 3227; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3228; VLX: # %bb.0: # %entry 3229; VLX-NEXT: kmovd %edi, %k1 3230; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3231; VLX-NEXT: kmovd %k0, %eax 3232; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3233; VLX-NEXT: retq 3234; 3235; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3236; NoVLX: # %bb.0: # %entry 3237; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3238; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 3239; NoVLX-NEXT: kmovw %edi, %k1 3240; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3241; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3242; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3243; NoVLX-NEXT: kmovw %k0, %eax 3244; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3245; NoVLX-NEXT: vzeroupper 3246; NoVLX-NEXT: retq 3247entry: 3248 %0 = bitcast <2 x i64> %__a to <2 x i64> 3249 %load = load i64, i64* %__b 3250 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3251 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3252 %2 = icmp eq <2 x i64> %0, %1 3253 %3 = bitcast i8 %__u to <8 x i1> 3254 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3255 %4 = and <2 x i1> %extract.i, %2 3256 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3257 %6 = bitcast <16 x i1> %5 to i16 3258 ret i16 %6 3259} 3260 3261 3262define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3263; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: 3264; VLX: # %bb.0: # %entry 3265; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3266; VLX-NEXT: kmovd %k0, %eax 3267; VLX-NEXT: retq 3268; 3269; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: 3270; NoVLX: # %bb.0: # %entry 3271; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3272; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3273; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3274; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3275; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3276; NoVLX-NEXT: kmovw %k0, %eax 3277; NoVLX-NEXT: vzeroupper 3278; NoVLX-NEXT: retq 3279entry: 3280 %0 = bitcast <2 x i64> %__a to <2 x i64> 3281 %1 = bitcast <2 x i64> %__b to <2 x i64> 3282 %2 = icmp eq <2 x i64> %0, %1 3283 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3284 %4 = bitcast <32 x i1> %3 to i32 3285 ret i32 %4 3286} 3287 3288define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3289; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: 3290; VLX: # %bb.0: # %entry 3291; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3292; VLX-NEXT: kmovd %k0, %eax 3293; VLX-NEXT: retq 3294; 3295; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: 3296; NoVLX: # %bb.0: # %entry 3297; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3298; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3299; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3300; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3301; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3302; NoVLX-NEXT: kmovw %k0, %eax 3303; NoVLX-NEXT: vzeroupper 3304; NoVLX-NEXT: retq 3305entry: 3306 %0 = bitcast <2 x i64> %__a to <2 x i64> 3307 %load = load <2 x i64>, <2 x i64>* %__b 3308 %1 = bitcast <2 x i64> %load to <2 x i64> 3309 %2 = icmp eq <2 x i64> %0, %1 3310 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3311 %4 = bitcast <32 x i1> %3 to i32 3312 ret i32 %4 3313} 3314 3315define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3316; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: 3317; VLX: # %bb.0: # %entry 3318; VLX-NEXT: kmovd %edi, %k1 3319; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3320; VLX-NEXT: kmovd %k0, %eax 3321; VLX-NEXT: retq 3322; 3323; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: 3324; NoVLX: # %bb.0: # %entry 3325; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3326; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3327; NoVLX-NEXT: kmovw %edi, %k1 3328; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3329; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3330; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3331; NoVLX-NEXT: kmovw %k0, %eax 3332; NoVLX-NEXT: vzeroupper 3333; NoVLX-NEXT: retq 3334entry: 3335 %0 = bitcast <2 x i64> %__a to <2 x i64> 3336 %1 = bitcast <2 x i64> %__b to <2 x i64> 3337 %2 = icmp eq <2 x i64> %0, %1 3338 %3 = bitcast i8 %__u to <8 x i1> 3339 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3340 %4 = and <2 x i1> %2, %extract.i 3341 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3342 %6 = bitcast <32 x i1> %5 to i32 3343 ret i32 %6 3344} 3345 3346define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3347; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: 3348; VLX: # %bb.0: # %entry 3349; VLX-NEXT: kmovd %edi, %k1 3350; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3351; VLX-NEXT: kmovd %k0, %eax 3352; VLX-NEXT: retq 3353; 3354; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: 3355; NoVLX: # %bb.0: # %entry 3356; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3357; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3358; NoVLX-NEXT: kmovw %edi, %k1 3359; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3360; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3361; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3362; NoVLX-NEXT: kmovw %k0, %eax 3363; NoVLX-NEXT: vzeroupper 3364; NoVLX-NEXT: retq 3365entry: 3366 %0 = bitcast <2 x i64> %__a to <2 x i64> 3367 %load = load <2 x i64>, <2 x i64>* %__b 3368 %1 = bitcast <2 x i64> %load to <2 x i64> 3369 %2 = icmp eq <2 x i64> %0, %1 3370 %3 = bitcast i8 %__u to <8 x i1> 3371 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3372 %4 = and <2 x i1> %2, %extract.i 3373 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3374 %6 = bitcast <32 x i1> %5 to i32 3375 ret i32 %6 3376} 3377 3378 3379define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 3380; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3381; VLX: # %bb.0: # %entry 3382; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3383; VLX-NEXT: kmovd %k0, %eax 3384; VLX-NEXT: retq 3385; 3386; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3387; NoVLX: # %bb.0: # %entry 3388; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3389; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 3390; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3391; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3392; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3393; NoVLX-NEXT: kmovw %k0, %eax 3394; NoVLX-NEXT: vzeroupper 3395; NoVLX-NEXT: retq 3396entry: 3397 %0 = bitcast <2 x i64> %__a to <2 x i64> 3398 %load = load i64, i64* %__b 3399 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3400 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3401 %2 = icmp eq <2 x i64> %0, %1 3402 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3403 %4 = bitcast <32 x i1> %3 to i32 3404 ret i32 %4 3405} 3406 3407define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 3408; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3409; VLX: # %bb.0: # %entry 3410; VLX-NEXT: kmovd %edi, %k1 3411; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3412; VLX-NEXT: kmovd %k0, %eax 3413; VLX-NEXT: retq 3414; 3415; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3416; NoVLX: # %bb.0: # %entry 3417; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3418; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 3419; NoVLX-NEXT: kmovw %edi, %k1 3420; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3421; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3422; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3423; NoVLX-NEXT: kmovw %k0, %eax 3424; NoVLX-NEXT: vzeroupper 3425; NoVLX-NEXT: retq 3426entry: 3427 %0 = bitcast <2 x i64> %__a to <2 x i64> 3428 %load = load i64, i64* %__b 3429 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3430 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3431 %2 = icmp eq <2 x i64> %0, %1 3432 %3 = bitcast i8 %__u to <8 x i1> 3433 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3434 %4 = and <2 x i1> %extract.i, %2 3435 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3436 %6 = bitcast <32 x i1> %5 to i32 3437 ret i32 %6 3438} 3439 3440 3441define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3442; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: 3443; VLX: # %bb.0: # %entry 3444; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3445; VLX-NEXT: kmovq %k0, %rax 3446; VLX-NEXT: retq 3447; 3448; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: 3449; NoVLX: # %bb.0: # %entry 3450; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3451; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3452; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3453; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3454; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3455; NoVLX-NEXT: kmovw %k0, %eax 3456; NoVLX-NEXT: movzwl %ax, %eax 3457; NoVLX-NEXT: vzeroupper 3458; NoVLX-NEXT: retq 3459entry: 3460 %0 = bitcast <2 x i64> %__a to <2 x i64> 3461 %1 = bitcast <2 x i64> %__b to <2 x i64> 3462 %2 = icmp eq <2 x i64> %0, %1 3463 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3464 %4 = bitcast <64 x i1> %3 to i64 3465 ret i64 %4 3466} 3467 3468define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3469; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: 3470; VLX: # %bb.0: # %entry 3471; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3472; VLX-NEXT: kmovq %k0, %rax 3473; VLX-NEXT: retq 3474; 3475; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: 3476; NoVLX: # %bb.0: # %entry 3477; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3478; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3479; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3480; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3481; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3482; NoVLX-NEXT: kmovw %k0, %eax 3483; NoVLX-NEXT: movzwl %ax, %eax 3484; NoVLX-NEXT: vzeroupper 3485; NoVLX-NEXT: retq 3486entry: 3487 %0 = bitcast <2 x i64> %__a to <2 x i64> 3488 %load = load <2 x i64>, <2 x i64>* %__b 3489 %1 = bitcast <2 x i64> %load to <2 x i64> 3490 %2 = icmp eq <2 x i64> %0, %1 3491 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3492 %4 = bitcast <64 x i1> %3 to i64 3493 ret i64 %4 3494} 3495 3496define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3497; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: 3498; VLX: # %bb.0: # %entry 3499; VLX-NEXT: kmovd %edi, %k1 3500; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3501; VLX-NEXT: kmovq %k0, %rax 3502; VLX-NEXT: retq 3503; 3504; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: 3505; NoVLX: # %bb.0: # %entry 3506; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3507; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3508; NoVLX-NEXT: kmovw %edi, %k1 3509; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3510; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3511; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3512; NoVLX-NEXT: kmovw %k0, %eax 3513; NoVLX-NEXT: movzwl %ax, %eax 3514; NoVLX-NEXT: vzeroupper 3515; NoVLX-NEXT: retq 3516entry: 3517 %0 = bitcast <2 x i64> %__a to <2 x i64> 3518 %1 = bitcast <2 x i64> %__b to <2 x i64> 3519 %2 = icmp eq <2 x i64> %0, %1 3520 %3 = bitcast i8 %__u to <8 x i1> 3521 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3522 %4 = and <2 x i1> %2, %extract.i 3523 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3524 %6 = bitcast <64 x i1> %5 to i64 3525 ret i64 %6 3526} 3527 3528define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 3529; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: 3530; VLX: # %bb.0: # %entry 3531; VLX-NEXT: kmovd %edi, %k1 3532; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3533; VLX-NEXT: kmovq %k0, %rax 3534; VLX-NEXT: retq 3535; 3536; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: 3537; NoVLX: # %bb.0: # %entry 3538; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3539; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3540; NoVLX-NEXT: kmovw %edi, %k1 3541; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3542; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3543; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3544; NoVLX-NEXT: kmovw %k0, %eax 3545; NoVLX-NEXT: movzwl %ax, %eax 3546; NoVLX-NEXT: vzeroupper 3547; NoVLX-NEXT: retq 3548entry: 3549 %0 = bitcast <2 x i64> %__a to <2 x i64> 3550 %load = load <2 x i64>, <2 x i64>* %__b 3551 %1 = bitcast <2 x i64> %load to <2 x i64> 3552 %2 = icmp eq <2 x i64> %0, %1 3553 %3 = bitcast i8 %__u to <8 x i1> 3554 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3555 %4 = and <2 x i1> %2, %extract.i 3556 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3557 %6 = bitcast <64 x i1> %5 to i64 3558 ret i64 %6 3559} 3560 3561 3562define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 3563; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3564; VLX: # %bb.0: # %entry 3565; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3566; VLX-NEXT: kmovq %k0, %rax 3567; VLX-NEXT: retq 3568; 3569; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3570; NoVLX: # %bb.0: # %entry 3571; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3572; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 3573; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3574; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3575; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3576; NoVLX-NEXT: kmovw %k0, %eax 3577; NoVLX-NEXT: movzwl %ax, %eax 3578; NoVLX-NEXT: vzeroupper 3579; NoVLX-NEXT: retq 3580entry: 3581 %0 = bitcast <2 x i64> %__a to <2 x i64> 3582 %load = load i64, i64* %__b 3583 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3584 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3585 %2 = icmp eq <2 x i64> %0, %1 3586 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3587 %4 = bitcast <64 x i1> %3 to i64 3588 ret i64 %4 3589} 3590 3591define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 3592; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3593; VLX: # %bb.0: # %entry 3594; VLX-NEXT: kmovd %edi, %k1 3595; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3596; VLX-NEXT: kmovq %k0, %rax 3597; VLX-NEXT: retq 3598; 3599; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3600; NoVLX: # %bb.0: # %entry 3601; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3602; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 3603; NoVLX-NEXT: kmovw %edi, %k1 3604; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3605; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3606; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3607; NoVLX-NEXT: kmovw %k0, %eax 3608; NoVLX-NEXT: movzwl %ax, %eax 3609; NoVLX-NEXT: vzeroupper 3610; NoVLX-NEXT: retq 3611entry: 3612 %0 = bitcast <2 x i64> %__a to <2 x i64> 3613 %load = load i64, i64* %__b 3614 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3615 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3616 %2 = icmp eq <2 x i64> %0, %1 3617 %3 = bitcast i8 %__u to <8 x i1> 3618 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3619 %4 = and <2 x i1> %extract.i, %2 3620 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3621 %6 = bitcast <64 x i1> %5 to i64 3622 ret i64 %6 3623} 3624 3625 3626define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3627; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: 3628; VLX: # %bb.0: # %entry 3629; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 3630; VLX-NEXT: kmovd %k0, %eax 3631; VLX-NEXT: # kill: def $al killed $al killed $eax 3632; VLX-NEXT: vzeroupper 3633; VLX-NEXT: retq 3634; 3635; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: 3636; NoVLX: # %bb.0: # %entry 3637; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3638; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3639; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3640; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3641; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3642; NoVLX-NEXT: kmovw %k0, %eax 3643; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3644; NoVLX-NEXT: vzeroupper 3645; NoVLX-NEXT: retq 3646entry: 3647 %0 = bitcast <4 x i64> %__a to <4 x i64> 3648 %1 = bitcast <4 x i64> %__b to <4 x i64> 3649 %2 = icmp eq <4 x i64> %0, %1 3650 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3651 %4 = bitcast <8 x i1> %3 to i8 3652 ret i8 %4 3653} 3654 3655define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 3656; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: 3657; VLX: # %bb.0: # %entry 3658; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 3659; VLX-NEXT: kmovd %k0, %eax 3660; VLX-NEXT: # kill: def $al killed $al killed $eax 3661; VLX-NEXT: vzeroupper 3662; VLX-NEXT: retq 3663; 3664; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: 3665; NoVLX: # %bb.0: # %entry 3666; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3667; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 3668; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3669; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3670; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3671; NoVLX-NEXT: kmovw %k0, %eax 3672; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3673; NoVLX-NEXT: vzeroupper 3674; NoVLX-NEXT: retq 3675entry: 3676 %0 = bitcast <4 x i64> %__a to <4 x i64> 3677 %load = load <4 x i64>, <4 x i64>* %__b 3678 %1 = bitcast <4 x i64> %load to <4 x i64> 3679 %2 = icmp eq <4 x i64> %0, %1 3680 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3681 %4 = bitcast <8 x i1> %3 to i8 3682 ret i8 %4 3683} 3684 3685define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3686; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: 3687; VLX: # %bb.0: # %entry 3688; VLX-NEXT: kmovd %edi, %k1 3689; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 3690; VLX-NEXT: kmovd %k0, %eax 3691; VLX-NEXT: # kill: def $al killed $al killed $eax 3692; VLX-NEXT: vzeroupper 3693; VLX-NEXT: retq 3694; 3695; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: 3696; NoVLX: # %bb.0: # %entry 3697; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3698; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3699; NoVLX-NEXT: kmovw %edi, %k1 3700; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3701; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3702; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3703; NoVLX-NEXT: kmovw %k0, %eax 3704; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3705; NoVLX-NEXT: vzeroupper 3706; NoVLX-NEXT: retq 3707entry: 3708 %0 = bitcast <4 x i64> %__a to <4 x i64> 3709 %1 = bitcast <4 x i64> %__b to <4 x i64> 3710 %2 = icmp eq <4 x i64> %0, %1 3711 %3 = bitcast i8 %__u to <8 x i1> 3712 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3713 %4 = and <4 x i1> %2, %extract.i 3714 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3715 %6 = bitcast <8 x i1> %5 to i8 3716 ret i8 %6 3717} 3718 3719define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 3720; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: 3721; VLX: # %bb.0: # %entry 3722; VLX-NEXT: kmovd %edi, %k1 3723; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 3724; VLX-NEXT: kmovd %k0, %eax 3725; VLX-NEXT: # kill: def $al killed $al killed $eax 3726; VLX-NEXT: vzeroupper 3727; VLX-NEXT: retq 3728; 3729; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: 3730; NoVLX: # %bb.0: # %entry 3731; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3732; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 3733; NoVLX-NEXT: kmovw %edi, %k1 3734; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3735; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3736; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3737; NoVLX-NEXT: kmovw %k0, %eax 3738; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3739; NoVLX-NEXT: vzeroupper 3740; NoVLX-NEXT: retq 3741entry: 3742 %0 = bitcast <4 x i64> %__a to <4 x i64> 3743 %load = load <4 x i64>, <4 x i64>* %__b 3744 %1 = bitcast <4 x i64> %load to <4 x i64> 3745 %2 = icmp eq <4 x i64> %0, %1 3746 %3 = bitcast i8 %__u to <8 x i1> 3747 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3748 %4 = and <4 x i1> %2, %extract.i 3749 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3750 %6 = bitcast <8 x i1> %5 to i8 3751 ret i8 %6 3752} 3753 3754 3755define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 3756; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3757; VLX: # %bb.0: # %entry 3758; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 3759; VLX-NEXT: kmovd %k0, %eax 3760; VLX-NEXT: # kill: def $al killed $al killed $eax 3761; VLX-NEXT: vzeroupper 3762; VLX-NEXT: retq 3763; 3764; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3765; NoVLX: # %bb.0: # %entry 3766; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3767; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 3768; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3769; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3770; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3771; NoVLX-NEXT: kmovw %k0, %eax 3772; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3773; NoVLX-NEXT: vzeroupper 3774; NoVLX-NEXT: retq 3775entry: 3776 %0 = bitcast <4 x i64> %__a to <4 x i64> 3777 %load = load i64, i64* %__b 3778 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3779 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3780 %2 = icmp eq <4 x i64> %0, %1 3781 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3782 %4 = bitcast <8 x i1> %3 to i8 3783 ret i8 %4 3784} 3785 3786define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 3787; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3788; VLX: # %bb.0: # %entry 3789; VLX-NEXT: kmovd %edi, %k1 3790; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 3791; VLX-NEXT: kmovd %k0, %eax 3792; VLX-NEXT: # kill: def $al killed $al killed $eax 3793; VLX-NEXT: vzeroupper 3794; VLX-NEXT: retq 3795; 3796; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3797; NoVLX: # %bb.0: # %entry 3798; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3799; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 3800; NoVLX-NEXT: kmovw %edi, %k1 3801; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3802; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3803; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3804; NoVLX-NEXT: kmovw %k0, %eax 3805; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3806; NoVLX-NEXT: vzeroupper 3807; NoVLX-NEXT: retq 3808entry: 3809 %0 = bitcast <4 x i64> %__a to <4 x i64> 3810 %load = load i64, i64* %__b 3811 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3812 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3813 %2 = icmp eq <4 x i64> %0, %1 3814 %3 = bitcast i8 %__u to <8 x i1> 3815 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3816 %4 = and <4 x i1> %extract.i, %2 3817 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3818 %6 = bitcast <8 x i1> %5 to i8 3819 ret i8 %6 3820} 3821 3822 3823define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3824; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: 3825; VLX: # %bb.0: # %entry 3826; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 3827; VLX-NEXT: kmovd %k0, %eax 3828; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3829; VLX-NEXT: vzeroupper 3830; VLX-NEXT: retq 3831; 3832; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: 3833; NoVLX: # %bb.0: # %entry 3834; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3835; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3836; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3837; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3838; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3839; NoVLX-NEXT: kmovw %k0, %eax 3840; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3841; NoVLX-NEXT: vzeroupper 3842; NoVLX-NEXT: retq 3843entry: 3844 %0 = bitcast <4 x i64> %__a to <4 x i64> 3845 %1 = bitcast <4 x i64> %__b to <4 x i64> 3846 %2 = icmp eq <4 x i64> %0, %1 3847 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3848 %4 = bitcast <16 x i1> %3 to i16 3849 ret i16 %4 3850} 3851 3852define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 3853; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: 3854; VLX: # %bb.0: # %entry 3855; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 3856; VLX-NEXT: kmovd %k0, %eax 3857; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3858; VLX-NEXT: vzeroupper 3859; VLX-NEXT: retq 3860; 3861; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: 3862; NoVLX: # %bb.0: # %entry 3863; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3864; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 3865; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3866; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3867; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3868; NoVLX-NEXT: kmovw %k0, %eax 3869; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3870; NoVLX-NEXT: vzeroupper 3871; NoVLX-NEXT: retq 3872entry: 3873 %0 = bitcast <4 x i64> %__a to <4 x i64> 3874 %load = load <4 x i64>, <4 x i64>* %__b 3875 %1 = bitcast <4 x i64> %load to <4 x i64> 3876 %2 = icmp eq <4 x i64> %0, %1 3877 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3878 %4 = bitcast <16 x i1> %3 to i16 3879 ret i16 %4 3880} 3881 3882define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3883; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: 3884; VLX: # %bb.0: # %entry 3885; VLX-NEXT: kmovd %edi, %k1 3886; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 3887; VLX-NEXT: kmovd %k0, %eax 3888; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3889; VLX-NEXT: vzeroupper 3890; VLX-NEXT: retq 3891; 3892; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: 3893; NoVLX: # %bb.0: # %entry 3894; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3895; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3896; NoVLX-NEXT: kmovw %edi, %k1 3897; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3898; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3899; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3900; NoVLX-NEXT: kmovw %k0, %eax 3901; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3902; NoVLX-NEXT: vzeroupper 3903; NoVLX-NEXT: retq 3904entry: 3905 %0 = bitcast <4 x i64> %__a to <4 x i64> 3906 %1 = bitcast <4 x i64> %__b to <4 x i64> 3907 %2 = icmp eq <4 x i64> %0, %1 3908 %3 = bitcast i8 %__u to <8 x i1> 3909 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3910 %4 = and <4 x i1> %2, %extract.i 3911 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3912 %6 = bitcast <16 x i1> %5 to i16 3913 ret i16 %6 3914} 3915 3916define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 3917; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: 3918; VLX: # %bb.0: # %entry 3919; VLX-NEXT: kmovd %edi, %k1 3920; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 3921; VLX-NEXT: kmovd %k0, %eax 3922; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3923; VLX-NEXT: vzeroupper 3924; VLX-NEXT: retq 3925; 3926; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: 3927; NoVLX: # %bb.0: # %entry 3928; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3929; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 3930; NoVLX-NEXT: kmovw %edi, %k1 3931; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3932; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3933; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3934; NoVLX-NEXT: kmovw %k0, %eax 3935; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3936; NoVLX-NEXT: vzeroupper 3937; NoVLX-NEXT: retq 3938entry: 3939 %0 = bitcast <4 x i64> %__a to <4 x i64> 3940 %load = load <4 x i64>, <4 x i64>* %__b 3941 %1 = bitcast <4 x i64> %load to <4 x i64> 3942 %2 = icmp eq <4 x i64> %0, %1 3943 %3 = bitcast i8 %__u to <8 x i1> 3944 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3945 %4 = and <4 x i1> %2, %extract.i 3946 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3947 %6 = bitcast <16 x i1> %5 to i16 3948 ret i16 %6 3949} 3950 3951 3952define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 3953; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3954; VLX: # %bb.0: # %entry 3955; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 3956; VLX-NEXT: kmovd %k0, %eax 3957; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3958; VLX-NEXT: vzeroupper 3959; VLX-NEXT: retq 3960; 3961; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3962; NoVLX: # %bb.0: # %entry 3963; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3964; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 3965; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3966; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3967; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3968; NoVLX-NEXT: kmovw %k0, %eax 3969; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3970; NoVLX-NEXT: vzeroupper 3971; NoVLX-NEXT: retq 3972entry: 3973 %0 = bitcast <4 x i64> %__a to <4 x i64> 3974 %load = load i64, i64* %__b 3975 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3976 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3977 %2 = icmp eq <4 x i64> %0, %1 3978 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3979 %4 = bitcast <16 x i1> %3 to i16 3980 ret i16 %4 3981} 3982 3983define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 3984; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3985; VLX: # %bb.0: # %entry 3986; VLX-NEXT: kmovd %edi, %k1 3987; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 3988; VLX-NEXT: kmovd %k0, %eax 3989; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3990; VLX-NEXT: vzeroupper 3991; VLX-NEXT: retq 3992; 3993; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3994; NoVLX: # %bb.0: # %entry 3995; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3996; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 3997; NoVLX-NEXT: kmovw %edi, %k1 3998; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3999; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4000; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4001; NoVLX-NEXT: kmovw %k0, %eax 4002; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4003; NoVLX-NEXT: vzeroupper 4004; NoVLX-NEXT: retq 4005entry: 4006 %0 = bitcast <4 x i64> %__a to <4 x i64> 4007 %load = load i64, i64* %__b 4008 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4009 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4010 %2 = icmp eq <4 x i64> %0, %1 4011 %3 = bitcast i8 %__u to <8 x i1> 4012 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4013 %4 = and <4 x i1> %extract.i, %2 4014 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4015 %6 = bitcast <16 x i1> %5 to i16 4016 ret i16 %6 4017} 4018 4019 4020define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4021; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: 4022; VLX: # %bb.0: # %entry 4023; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 4024; VLX-NEXT: kmovd %k0, %eax 4025; VLX-NEXT: vzeroupper 4026; VLX-NEXT: retq 4027; 4028; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: 4029; NoVLX: # %bb.0: # %entry 4030; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4031; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4032; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4033; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4034; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4035; NoVLX-NEXT: kmovw %k0, %eax 4036; NoVLX-NEXT: vzeroupper 4037; NoVLX-NEXT: retq 4038entry: 4039 %0 = bitcast <4 x i64> %__a to <4 x i64> 4040 %1 = bitcast <4 x i64> %__b to <4 x i64> 4041 %2 = icmp eq <4 x i64> %0, %1 4042 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4043 %4 = bitcast <32 x i1> %3 to i32 4044 ret i32 %4 4045} 4046 4047define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 4048; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: 4049; VLX: # %bb.0: # %entry 4050; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 4051; VLX-NEXT: kmovd %k0, %eax 4052; VLX-NEXT: vzeroupper 4053; VLX-NEXT: retq 4054; 4055; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: 4056; NoVLX: # %bb.0: # %entry 4057; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4058; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 4059; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4060; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4061; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4062; NoVLX-NEXT: kmovw %k0, %eax 4063; NoVLX-NEXT: vzeroupper 4064; NoVLX-NEXT: retq 4065entry: 4066 %0 = bitcast <4 x i64> %__a to <4 x i64> 4067 %load = load <4 x i64>, <4 x i64>* %__b 4068 %1 = bitcast <4 x i64> %load to <4 x i64> 4069 %2 = icmp eq <4 x i64> %0, %1 4070 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4071 %4 = bitcast <32 x i1> %3 to i32 4072 ret i32 %4 4073} 4074 4075define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4076; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: 4077; VLX: # %bb.0: # %entry 4078; VLX-NEXT: kmovd %edi, %k1 4079; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 4080; VLX-NEXT: kmovd %k0, %eax 4081; VLX-NEXT: vzeroupper 4082; VLX-NEXT: retq 4083; 4084; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: 4085; NoVLX: # %bb.0: # %entry 4086; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4087; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4088; NoVLX-NEXT: kmovw %edi, %k1 4089; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4090; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4091; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4092; NoVLX-NEXT: kmovw %k0, %eax 4093; NoVLX-NEXT: vzeroupper 4094; NoVLX-NEXT: retq 4095entry: 4096 %0 = bitcast <4 x i64> %__a to <4 x i64> 4097 %1 = bitcast <4 x i64> %__b to <4 x i64> 4098 %2 = icmp eq <4 x i64> %0, %1 4099 %3 = bitcast i8 %__u to <8 x i1> 4100 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4101 %4 = and <4 x i1> %2, %extract.i 4102 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4103 %6 = bitcast <32 x i1> %5 to i32 4104 ret i32 %6 4105} 4106 4107define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 4108; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: 4109; VLX: # %bb.0: # %entry 4110; VLX-NEXT: kmovd %edi, %k1 4111; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 4112; VLX-NEXT: kmovd %k0, %eax 4113; VLX-NEXT: vzeroupper 4114; VLX-NEXT: retq 4115; 4116; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: 4117; NoVLX: # %bb.0: # %entry 4118; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4119; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 4120; NoVLX-NEXT: kmovw %edi, %k1 4121; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4122; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4123; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4124; NoVLX-NEXT: kmovw %k0, %eax 4125; NoVLX-NEXT: vzeroupper 4126; NoVLX-NEXT: retq 4127entry: 4128 %0 = bitcast <4 x i64> %__a to <4 x i64> 4129 %load = load <4 x i64>, <4 x i64>* %__b 4130 %1 = bitcast <4 x i64> %load to <4 x i64> 4131 %2 = icmp eq <4 x i64> %0, %1 4132 %3 = bitcast i8 %__u to <8 x i1> 4133 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4134 %4 = and <4 x i1> %2, %extract.i 4135 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4136 %6 = bitcast <32 x i1> %5 to i32 4137 ret i32 %6 4138} 4139 4140 4141define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 4142; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4143; VLX: # %bb.0: # %entry 4144; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 4145; VLX-NEXT: kmovd %k0, %eax 4146; VLX-NEXT: vzeroupper 4147; VLX-NEXT: retq 4148; 4149; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4150; NoVLX: # %bb.0: # %entry 4151; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4152; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 4153; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4154; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4155; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4156; NoVLX-NEXT: kmovw %k0, %eax 4157; NoVLX-NEXT: vzeroupper 4158; NoVLX-NEXT: retq 4159entry: 4160 %0 = bitcast <4 x i64> %__a to <4 x i64> 4161 %load = load i64, i64* %__b 4162 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4163 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4164 %2 = icmp eq <4 x i64> %0, %1 4165 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4166 %4 = bitcast <32 x i1> %3 to i32 4167 ret i32 %4 4168} 4169 4170define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 4171; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4172; VLX: # %bb.0: # %entry 4173; VLX-NEXT: kmovd %edi, %k1 4174; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 4175; VLX-NEXT: kmovd %k0, %eax 4176; VLX-NEXT: vzeroupper 4177; VLX-NEXT: retq 4178; 4179; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4180; NoVLX: # %bb.0: # %entry 4181; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4182; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 4183; NoVLX-NEXT: kmovw %edi, %k1 4184; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4185; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4186; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4187; NoVLX-NEXT: kmovw %k0, %eax 4188; NoVLX-NEXT: vzeroupper 4189; NoVLX-NEXT: retq 4190entry: 4191 %0 = bitcast <4 x i64> %__a to <4 x i64> 4192 %load = load i64, i64* %__b 4193 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4194 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4195 %2 = icmp eq <4 x i64> %0, %1 4196 %3 = bitcast i8 %__u to <8 x i1> 4197 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4198 %4 = and <4 x i1> %extract.i, %2 4199 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4200 %6 = bitcast <32 x i1> %5 to i32 4201 ret i32 %6 4202} 4203 4204 4205define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4206; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: 4207; VLX: # %bb.0: # %entry 4208; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 4209; VLX-NEXT: kmovq %k0, %rax 4210; VLX-NEXT: vzeroupper 4211; VLX-NEXT: retq 4212; 4213; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: 4214; NoVLX: # %bb.0: # %entry 4215; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4216; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4217; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4218; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4219; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4220; NoVLX-NEXT: kmovw %k0, %eax 4221; NoVLX-NEXT: movzwl %ax, %eax 4222; NoVLX-NEXT: vzeroupper 4223; NoVLX-NEXT: retq 4224entry: 4225 %0 = bitcast <4 x i64> %__a to <4 x i64> 4226 %1 = bitcast <4 x i64> %__b to <4 x i64> 4227 %2 = icmp eq <4 x i64> %0, %1 4228 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4229 %4 = bitcast <64 x i1> %3 to i64 4230 ret i64 %4 4231} 4232 4233define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 4234; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: 4235; VLX: # %bb.0: # %entry 4236; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 4237; VLX-NEXT: kmovq %k0, %rax 4238; VLX-NEXT: vzeroupper 4239; VLX-NEXT: retq 4240; 4241; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: 4242; NoVLX: # %bb.0: # %entry 4243; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4244; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 4245; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4246; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4247; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4248; NoVLX-NEXT: kmovw %k0, %eax 4249; NoVLX-NEXT: movzwl %ax, %eax 4250; NoVLX-NEXT: vzeroupper 4251; NoVLX-NEXT: retq 4252entry: 4253 %0 = bitcast <4 x i64> %__a to <4 x i64> 4254 %load = load <4 x i64>, <4 x i64>* %__b 4255 %1 = bitcast <4 x i64> %load to <4 x i64> 4256 %2 = icmp eq <4 x i64> %0, %1 4257 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4258 %4 = bitcast <64 x i1> %3 to i64 4259 ret i64 %4 4260} 4261 4262define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4263; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: 4264; VLX: # %bb.0: # %entry 4265; VLX-NEXT: kmovd %edi, %k1 4266; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 4267; VLX-NEXT: kmovq %k0, %rax 4268; VLX-NEXT: vzeroupper 4269; VLX-NEXT: retq 4270; 4271; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: 4272; NoVLX: # %bb.0: # %entry 4273; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4274; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4275; NoVLX-NEXT: kmovw %edi, %k1 4276; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4277; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4278; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4279; NoVLX-NEXT: kmovw %k0, %eax 4280; NoVLX-NEXT: movzwl %ax, %eax 4281; NoVLX-NEXT: vzeroupper 4282; NoVLX-NEXT: retq 4283entry: 4284 %0 = bitcast <4 x i64> %__a to <4 x i64> 4285 %1 = bitcast <4 x i64> %__b to <4 x i64> 4286 %2 = icmp eq <4 x i64> %0, %1 4287 %3 = bitcast i8 %__u to <8 x i1> 4288 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4289 %4 = and <4 x i1> %2, %extract.i 4290 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4291 %6 = bitcast <64 x i1> %5 to i64 4292 ret i64 %6 4293} 4294 4295define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 4296; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: 4297; VLX: # %bb.0: # %entry 4298; VLX-NEXT: kmovd %edi, %k1 4299; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 4300; VLX-NEXT: kmovq %k0, %rax 4301; VLX-NEXT: vzeroupper 4302; VLX-NEXT: retq 4303; 4304; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: 4305; NoVLX: # %bb.0: # %entry 4306; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4307; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 4308; NoVLX-NEXT: kmovw %edi, %k1 4309; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4310; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4311; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4312; NoVLX-NEXT: kmovw %k0, %eax 4313; NoVLX-NEXT: movzwl %ax, %eax 4314; NoVLX-NEXT: vzeroupper 4315; NoVLX-NEXT: retq 4316entry: 4317 %0 = bitcast <4 x i64> %__a to <4 x i64> 4318 %load = load <4 x i64>, <4 x i64>* %__b 4319 %1 = bitcast <4 x i64> %load to <4 x i64> 4320 %2 = icmp eq <4 x i64> %0, %1 4321 %3 = bitcast i8 %__u to <8 x i1> 4322 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4323 %4 = and <4 x i1> %2, %extract.i 4324 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4325 %6 = bitcast <64 x i1> %5 to i64 4326 ret i64 %6 4327} 4328 4329 4330define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 4331; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4332; VLX: # %bb.0: # %entry 4333; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 4334; VLX-NEXT: kmovq %k0, %rax 4335; VLX-NEXT: vzeroupper 4336; VLX-NEXT: retq 4337; 4338; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4339; NoVLX: # %bb.0: # %entry 4340; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4341; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 4342; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4343; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4344; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4345; NoVLX-NEXT: kmovw %k0, %eax 4346; NoVLX-NEXT: movzwl %ax, %eax 4347; NoVLX-NEXT: vzeroupper 4348; NoVLX-NEXT: retq 4349entry: 4350 %0 = bitcast <4 x i64> %__a to <4 x i64> 4351 %load = load i64, i64* %__b 4352 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4353 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4354 %2 = icmp eq <4 x i64> %0, %1 4355 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4356 %4 = bitcast <64 x i1> %3 to i64 4357 ret i64 %4 4358} 4359 4360define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 4361; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4362; VLX: # %bb.0: # %entry 4363; VLX-NEXT: kmovd %edi, %k1 4364; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 4365; VLX-NEXT: kmovq %k0, %rax 4366; VLX-NEXT: vzeroupper 4367; VLX-NEXT: retq 4368; 4369; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4370; NoVLX: # %bb.0: # %entry 4371; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4372; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 4373; NoVLX-NEXT: kmovw %edi, %k1 4374; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4375; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4376; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4377; NoVLX-NEXT: kmovw %k0, %eax 4378; NoVLX-NEXT: movzwl %ax, %eax 4379; NoVLX-NEXT: vzeroupper 4380; NoVLX-NEXT: retq 4381entry: 4382 %0 = bitcast <4 x i64> %__a to <4 x i64> 4383 %load = load i64, i64* %__b 4384 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4385 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4386 %2 = icmp eq <4 x i64> %0, %1 4387 %3 = bitcast i8 %__u to <8 x i1> 4388 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4389 %4 = and <4 x i1> %extract.i, %2 4390 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4391 %6 = bitcast <64 x i1> %5 to i64 4392 ret i64 %6 4393} 4394 4395 4396define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4397; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: 4398; VLX: # %bb.0: # %entry 4399; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4400; VLX-NEXT: kmovd %k0, %eax 4401; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4402; VLX-NEXT: vzeroupper 4403; VLX-NEXT: retq 4404; 4405; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: 4406; NoVLX: # %bb.0: # %entry 4407; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4408; NoVLX-NEXT: kmovw %k0, %eax 4409; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4410; NoVLX-NEXT: vzeroupper 4411; NoVLX-NEXT: retq 4412entry: 4413 %0 = bitcast <8 x i64> %__a to <8 x i64> 4414 %1 = bitcast <8 x i64> %__b to <8 x i64> 4415 %2 = icmp eq <8 x i64> %0, %1 4416 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4417 %4 = bitcast <16 x i1> %3 to i16 4418 ret i16 %4 4419} 4420 4421define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4422; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: 4423; VLX: # %bb.0: # %entry 4424; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4425; VLX-NEXT: kmovd %k0, %eax 4426; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4427; VLX-NEXT: vzeroupper 4428; VLX-NEXT: retq 4429; 4430; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: 4431; NoVLX: # %bb.0: # %entry 4432; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4433; NoVLX-NEXT: kmovw %k0, %eax 4434; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4435; NoVLX-NEXT: vzeroupper 4436; NoVLX-NEXT: retq 4437entry: 4438 %0 = bitcast <8 x i64> %__a to <8 x i64> 4439 %load = load <8 x i64>, <8 x i64>* %__b 4440 %1 = bitcast <8 x i64> %load to <8 x i64> 4441 %2 = icmp eq <8 x i64> %0, %1 4442 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4443 %4 = bitcast <16 x i1> %3 to i16 4444 ret i16 %4 4445} 4446 4447define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4448; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: 4449; VLX: # %bb.0: # %entry 4450; VLX-NEXT: kmovd %edi, %k1 4451; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4452; VLX-NEXT: kmovd %k0, %eax 4453; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4454; VLX-NEXT: vzeroupper 4455; VLX-NEXT: retq 4456; 4457; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: 4458; NoVLX: # %bb.0: # %entry 4459; NoVLX-NEXT: kmovw %edi, %k1 4460; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4461; NoVLX-NEXT: kmovw %k0, %eax 4462; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4463; NoVLX-NEXT: vzeroupper 4464; NoVLX-NEXT: retq 4465entry: 4466 %0 = bitcast <8 x i64> %__a to <8 x i64> 4467 %1 = bitcast <8 x i64> %__b to <8 x i64> 4468 %2 = icmp eq <8 x i64> %0, %1 4469 %3 = bitcast i8 %__u to <8 x i1> 4470 %4 = and <8 x i1> %2, %3 4471 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4472 %6 = bitcast <16 x i1> %5 to i16 4473 ret i16 %6 4474} 4475 4476define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4477; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: 4478; VLX: # %bb.0: # %entry 4479; VLX-NEXT: kmovd %edi, %k1 4480; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4481; VLX-NEXT: kmovd %k0, %eax 4482; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4483; VLX-NEXT: vzeroupper 4484; VLX-NEXT: retq 4485; 4486; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: 4487; NoVLX: # %bb.0: # %entry 4488; NoVLX-NEXT: kmovw %edi, %k1 4489; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4490; NoVLX-NEXT: kmovw %k0, %eax 4491; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4492; NoVLX-NEXT: vzeroupper 4493; NoVLX-NEXT: retq 4494entry: 4495 %0 = bitcast <8 x i64> %__a to <8 x i64> 4496 %load = load <8 x i64>, <8 x i64>* %__b 4497 %1 = bitcast <8 x i64> %load to <8 x i64> 4498 %2 = icmp eq <8 x i64> %0, %1 4499 %3 = bitcast i8 %__u to <8 x i1> 4500 %4 = and <8 x i1> %2, %3 4501 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4502 %6 = bitcast <16 x i1> %5 to i16 4503 ret i16 %6 4504} 4505 4506 4507define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 4508; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4509; VLX: # %bb.0: # %entry 4510; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4511; VLX-NEXT: kmovd %k0, %eax 4512; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4513; VLX-NEXT: vzeroupper 4514; VLX-NEXT: retq 4515; 4516; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4517; NoVLX: # %bb.0: # %entry 4518; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4519; NoVLX-NEXT: kmovw %k0, %eax 4520; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4521; NoVLX-NEXT: vzeroupper 4522; NoVLX-NEXT: retq 4523entry: 4524 %0 = bitcast <8 x i64> %__a to <8 x i64> 4525 %load = load i64, i64* %__b 4526 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4527 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4528 %2 = icmp eq <8 x i64> %0, %1 4529 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4530 %4 = bitcast <16 x i1> %3 to i16 4531 ret i16 %4 4532} 4533 4534define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 4535; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4536; VLX: # %bb.0: # %entry 4537; VLX-NEXT: kmovd %edi, %k1 4538; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4539; VLX-NEXT: kmovd %k0, %eax 4540; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4541; VLX-NEXT: vzeroupper 4542; VLX-NEXT: retq 4543; 4544; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4545; NoVLX: # %bb.0: # %entry 4546; NoVLX-NEXT: kmovw %edi, %k1 4547; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4548; NoVLX-NEXT: kmovw %k0, %eax 4549; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4550; NoVLX-NEXT: vzeroupper 4551; NoVLX-NEXT: retq 4552entry: 4553 %0 = bitcast <8 x i64> %__a to <8 x i64> 4554 %load = load i64, i64* %__b 4555 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4556 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4557 %2 = icmp eq <8 x i64> %0, %1 4558 %3 = bitcast i8 %__u to <8 x i1> 4559 %4 = and <8 x i1> %3, %2 4560 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4561 %6 = bitcast <16 x i1> %5 to i16 4562 ret i16 %6 4563} 4564 4565 4566define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4567; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: 4568; VLX: # %bb.0: # %entry 4569; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4570; VLX-NEXT: kmovd %k0, %eax 4571; VLX-NEXT: vzeroupper 4572; VLX-NEXT: retq 4573; 4574; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: 4575; NoVLX: # %bb.0: # %entry 4576; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4577; NoVLX-NEXT: kmovw %k0, %eax 4578; NoVLX-NEXT: vzeroupper 4579; NoVLX-NEXT: retq 4580entry: 4581 %0 = bitcast <8 x i64> %__a to <8 x i64> 4582 %1 = bitcast <8 x i64> %__b to <8 x i64> 4583 %2 = icmp eq <8 x i64> %0, %1 4584 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4585 %4 = bitcast <32 x i1> %3 to i32 4586 ret i32 %4 4587} 4588 4589define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4590; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: 4591; VLX: # %bb.0: # %entry 4592; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4593; VLX-NEXT: kmovd %k0, %eax 4594; VLX-NEXT: vzeroupper 4595; VLX-NEXT: retq 4596; 4597; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: 4598; NoVLX: # %bb.0: # %entry 4599; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4600; NoVLX-NEXT: kmovw %k0, %eax 4601; NoVLX-NEXT: vzeroupper 4602; NoVLX-NEXT: retq 4603entry: 4604 %0 = bitcast <8 x i64> %__a to <8 x i64> 4605 %load = load <8 x i64>, <8 x i64>* %__b 4606 %1 = bitcast <8 x i64> %load to <8 x i64> 4607 %2 = icmp eq <8 x i64> %0, %1 4608 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4609 %4 = bitcast <32 x i1> %3 to i32 4610 ret i32 %4 4611} 4612 4613define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4614; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: 4615; VLX: # %bb.0: # %entry 4616; VLX-NEXT: kmovd %edi, %k1 4617; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4618; VLX-NEXT: kmovd %k0, %eax 4619; VLX-NEXT: vzeroupper 4620; VLX-NEXT: retq 4621; 4622; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: 4623; NoVLX: # %bb.0: # %entry 4624; NoVLX-NEXT: kmovw %edi, %k1 4625; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4626; NoVLX-NEXT: kmovw %k0, %eax 4627; NoVLX-NEXT: vzeroupper 4628; NoVLX-NEXT: retq 4629entry: 4630 %0 = bitcast <8 x i64> %__a to <8 x i64> 4631 %1 = bitcast <8 x i64> %__b to <8 x i64> 4632 %2 = icmp eq <8 x i64> %0, %1 4633 %3 = bitcast i8 %__u to <8 x i1> 4634 %4 = and <8 x i1> %2, %3 4635 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4636 %6 = bitcast <32 x i1> %5 to i32 4637 ret i32 %6 4638} 4639 4640define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4641; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: 4642; VLX: # %bb.0: # %entry 4643; VLX-NEXT: kmovd %edi, %k1 4644; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4645; VLX-NEXT: kmovd %k0, %eax 4646; VLX-NEXT: vzeroupper 4647; VLX-NEXT: retq 4648; 4649; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: 4650; NoVLX: # %bb.0: # %entry 4651; NoVLX-NEXT: kmovw %edi, %k1 4652; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4653; NoVLX-NEXT: kmovw %k0, %eax 4654; NoVLX-NEXT: vzeroupper 4655; NoVLX-NEXT: retq 4656entry: 4657 %0 = bitcast <8 x i64> %__a to <8 x i64> 4658 %load = load <8 x i64>, <8 x i64>* %__b 4659 %1 = bitcast <8 x i64> %load to <8 x i64> 4660 %2 = icmp eq <8 x i64> %0, %1 4661 %3 = bitcast i8 %__u to <8 x i1> 4662 %4 = and <8 x i1> %2, %3 4663 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4664 %6 = bitcast <32 x i1> %5 to i32 4665 ret i32 %6 4666} 4667 4668 4669define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 4670; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4671; VLX: # %bb.0: # %entry 4672; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4673; VLX-NEXT: kmovd %k0, %eax 4674; VLX-NEXT: vzeroupper 4675; VLX-NEXT: retq 4676; 4677; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4678; NoVLX: # %bb.0: # %entry 4679; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4680; NoVLX-NEXT: kmovw %k0, %eax 4681; NoVLX-NEXT: vzeroupper 4682; NoVLX-NEXT: retq 4683entry: 4684 %0 = bitcast <8 x i64> %__a to <8 x i64> 4685 %load = load i64, i64* %__b 4686 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4687 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4688 %2 = icmp eq <8 x i64> %0, %1 4689 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4690 %4 = bitcast <32 x i1> %3 to i32 4691 ret i32 %4 4692} 4693 4694define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 4695; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4696; VLX: # %bb.0: # %entry 4697; VLX-NEXT: kmovd %edi, %k1 4698; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4699; VLX-NEXT: kmovd %k0, %eax 4700; VLX-NEXT: vzeroupper 4701; VLX-NEXT: retq 4702; 4703; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4704; NoVLX: # %bb.0: # %entry 4705; NoVLX-NEXT: kmovw %edi, %k1 4706; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4707; NoVLX-NEXT: kmovw %k0, %eax 4708; NoVLX-NEXT: vzeroupper 4709; NoVLX-NEXT: retq 4710entry: 4711 %0 = bitcast <8 x i64> %__a to <8 x i64> 4712 %load = load i64, i64* %__b 4713 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4714 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4715 %2 = icmp eq <8 x i64> %0, %1 4716 %3 = bitcast i8 %__u to <8 x i1> 4717 %4 = and <8 x i1> %3, %2 4718 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4719 %6 = bitcast <32 x i1> %5 to i32 4720 ret i32 %6 4721} 4722 4723 4724define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4725; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: 4726; VLX: # %bb.0: # %entry 4727; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4728; VLX-NEXT: kmovq %k0, %rax 4729; VLX-NEXT: vzeroupper 4730; VLX-NEXT: retq 4731; 4732; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: 4733; NoVLX: # %bb.0: # %entry 4734; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4735; NoVLX-NEXT: kmovw %k0, %eax 4736; NoVLX-NEXT: movzwl %ax, %eax 4737; NoVLX-NEXT: vzeroupper 4738; NoVLX-NEXT: retq 4739entry: 4740 %0 = bitcast <8 x i64> %__a to <8 x i64> 4741 %1 = bitcast <8 x i64> %__b to <8 x i64> 4742 %2 = icmp eq <8 x i64> %0, %1 4743 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4744 %4 = bitcast <64 x i1> %3 to i64 4745 ret i64 %4 4746} 4747 4748define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4749; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: 4750; VLX: # %bb.0: # %entry 4751; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4752; VLX-NEXT: kmovq %k0, %rax 4753; VLX-NEXT: vzeroupper 4754; VLX-NEXT: retq 4755; 4756; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: 4757; NoVLX: # %bb.0: # %entry 4758; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4759; NoVLX-NEXT: kmovw %k0, %eax 4760; NoVLX-NEXT: movzwl %ax, %eax 4761; NoVLX-NEXT: vzeroupper 4762; NoVLX-NEXT: retq 4763entry: 4764 %0 = bitcast <8 x i64> %__a to <8 x i64> 4765 %load = load <8 x i64>, <8 x i64>* %__b 4766 %1 = bitcast <8 x i64> %load to <8 x i64> 4767 %2 = icmp eq <8 x i64> %0, %1 4768 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4769 %4 = bitcast <64 x i1> %3 to i64 4770 ret i64 %4 4771} 4772 4773define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4774; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: 4775; VLX: # %bb.0: # %entry 4776; VLX-NEXT: kmovd %edi, %k1 4777; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4778; VLX-NEXT: kmovq %k0, %rax 4779; VLX-NEXT: vzeroupper 4780; VLX-NEXT: retq 4781; 4782; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: 4783; NoVLX: # %bb.0: # %entry 4784; NoVLX-NEXT: kmovw %edi, %k1 4785; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4786; NoVLX-NEXT: kmovw %k0, %eax 4787; NoVLX-NEXT: movzwl %ax, %eax 4788; NoVLX-NEXT: vzeroupper 4789; NoVLX-NEXT: retq 4790entry: 4791 %0 = bitcast <8 x i64> %__a to <8 x i64> 4792 %1 = bitcast <8 x i64> %__b to <8 x i64> 4793 %2 = icmp eq <8 x i64> %0, %1 4794 %3 = bitcast i8 %__u to <8 x i1> 4795 %4 = and <8 x i1> %2, %3 4796 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4797 %6 = bitcast <64 x i1> %5 to i64 4798 ret i64 %6 4799} 4800 4801define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 4802; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: 4803; VLX: # %bb.0: # %entry 4804; VLX-NEXT: kmovd %edi, %k1 4805; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4806; VLX-NEXT: kmovq %k0, %rax 4807; VLX-NEXT: vzeroupper 4808; VLX-NEXT: retq 4809; 4810; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: 4811; NoVLX: # %bb.0: # %entry 4812; NoVLX-NEXT: kmovw %edi, %k1 4813; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4814; NoVLX-NEXT: kmovw %k0, %eax 4815; NoVLX-NEXT: movzwl %ax, %eax 4816; NoVLX-NEXT: vzeroupper 4817; NoVLX-NEXT: retq 4818entry: 4819 %0 = bitcast <8 x i64> %__a to <8 x i64> 4820 %load = load <8 x i64>, <8 x i64>* %__b 4821 %1 = bitcast <8 x i64> %load to <8 x i64> 4822 %2 = icmp eq <8 x i64> %0, %1 4823 %3 = bitcast i8 %__u to <8 x i1> 4824 %4 = and <8 x i1> %2, %3 4825 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4826 %6 = bitcast <64 x i1> %5 to i64 4827 ret i64 %6 4828} 4829 4830 4831define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 4832; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4833; VLX: # %bb.0: # %entry 4834; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4835; VLX-NEXT: kmovq %k0, %rax 4836; VLX-NEXT: vzeroupper 4837; VLX-NEXT: retq 4838; 4839; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4840; NoVLX: # %bb.0: # %entry 4841; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4842; NoVLX-NEXT: kmovw %k0, %eax 4843; NoVLX-NEXT: movzwl %ax, %eax 4844; NoVLX-NEXT: vzeroupper 4845; NoVLX-NEXT: retq 4846entry: 4847 %0 = bitcast <8 x i64> %__a to <8 x i64> 4848 %load = load i64, i64* %__b 4849 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4850 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4851 %2 = icmp eq <8 x i64> %0, %1 4852 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4853 %4 = bitcast <64 x i1> %3 to i64 4854 ret i64 %4 4855} 4856 4857define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 4858; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4859; VLX: # %bb.0: # %entry 4860; VLX-NEXT: kmovd %edi, %k1 4861; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4862; VLX-NEXT: kmovq %k0, %rax 4863; VLX-NEXT: vzeroupper 4864; VLX-NEXT: retq 4865; 4866; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4867; NoVLX: # %bb.0: # %entry 4868; NoVLX-NEXT: kmovw %edi, %k1 4869; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4870; NoVLX-NEXT: kmovw %k0, %eax 4871; NoVLX-NEXT: movzwl %ax, %eax 4872; NoVLX-NEXT: vzeroupper 4873; NoVLX-NEXT: retq 4874entry: 4875 %0 = bitcast <8 x i64> %__a to <8 x i64> 4876 %load = load i64, i64* %__b 4877 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4878 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4879 %2 = icmp eq <8 x i64> %0, %1 4880 %3 = bitcast i8 %__u to <8 x i1> 4881 %4 = and <8 x i1> %3, %2 4882 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4883 %6 = bitcast <64 x i1> %5 to i64 4884 ret i64 %6 4885} 4886 4887 4888define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4889; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: 4890; VLX: # %bb.0: # %entry 4891; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 4892; VLX-NEXT: kmovd %k0, %eax 4893; VLX-NEXT: retq 4894; 4895; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: 4896; NoVLX: # %bb.0: # %entry 4897; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 4898; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4899; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4900; NoVLX-NEXT: kmovw %k0, %eax 4901; NoVLX-NEXT: vzeroupper 4902; NoVLX-NEXT: retq 4903entry: 4904 %0 = bitcast <2 x i64> %__a to <16 x i8> 4905 %1 = bitcast <2 x i64> %__b to <16 x i8> 4906 %2 = icmp sgt <16 x i8> %0, %1 4907 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4908 %4 = bitcast <32 x i1> %3 to i32 4909 ret i32 %4 4910} 4911 4912define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 4913; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: 4914; VLX: # %bb.0: # %entry 4915; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 4916; VLX-NEXT: kmovd %k0, %eax 4917; VLX-NEXT: retq 4918; 4919; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: 4920; NoVLX: # %bb.0: # %entry 4921; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 4922; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4923; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4924; NoVLX-NEXT: kmovw %k0, %eax 4925; NoVLX-NEXT: vzeroupper 4926; NoVLX-NEXT: retq 4927entry: 4928 %0 = bitcast <2 x i64> %__a to <16 x i8> 4929 %load = load <2 x i64>, <2 x i64>* %__b 4930 %1 = bitcast <2 x i64> %load to <16 x i8> 4931 %2 = icmp sgt <16 x i8> %0, %1 4932 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4933 %4 = bitcast <32 x i1> %3 to i32 4934 ret i32 %4 4935} 4936 4937define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4938; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: 4939; VLX: # %bb.0: # %entry 4940; VLX-NEXT: kmovd %edi, %k1 4941; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 4942; VLX-NEXT: kmovd %k0, %eax 4943; VLX-NEXT: retq 4944; 4945; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: 4946; NoVLX: # %bb.0: # %entry 4947; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 4948; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4949; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4950; NoVLX-NEXT: kmovw %k0, %eax 4951; NoVLX-NEXT: andl %edi, %eax 4952; NoVLX-NEXT: vzeroupper 4953; NoVLX-NEXT: retq 4954entry: 4955 %0 = bitcast <2 x i64> %__a to <16 x i8> 4956 %1 = bitcast <2 x i64> %__b to <16 x i8> 4957 %2 = icmp sgt <16 x i8> %0, %1 4958 %3 = bitcast i16 %__u to <16 x i1> 4959 %4 = and <16 x i1> %2, %3 4960 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4961 %6 = bitcast <32 x i1> %5 to i32 4962 ret i32 %6 4963} 4964 4965define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 4966; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: 4967; VLX: # %bb.0: # %entry 4968; VLX-NEXT: kmovd %edi, %k1 4969; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} 4970; VLX-NEXT: kmovd %k0, %eax 4971; VLX-NEXT: retq 4972; 4973; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: 4974; NoVLX: # %bb.0: # %entry 4975; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 4976; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4977; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4978; NoVLX-NEXT: kmovw %k0, %eax 4979; NoVLX-NEXT: andl %edi, %eax 4980; NoVLX-NEXT: vzeroupper 4981; NoVLX-NEXT: retq 4982entry: 4983 %0 = bitcast <2 x i64> %__a to <16 x i8> 4984 %load = load <2 x i64>, <2 x i64>* %__b 4985 %1 = bitcast <2 x i64> %load to <16 x i8> 4986 %2 = icmp sgt <16 x i8> %0, %1 4987 %3 = bitcast i16 %__u to <16 x i1> 4988 %4 = and <16 x i1> %2, %3 4989 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4990 %6 = bitcast <32 x i1> %5 to i32 4991 ret i32 %6 4992} 4993 4994 4995define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4996; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: 4997; VLX: # %bb.0: # %entry 4998; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 4999; VLX-NEXT: kmovq %k0, %rax 5000; VLX-NEXT: retq 5001; 5002; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: 5003; NoVLX: # %bb.0: # %entry 5004; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 5005; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5006; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5007; NoVLX-NEXT: kmovw %k0, %eax 5008; NoVLX-NEXT: movzwl %ax, %eax 5009; NoVLX-NEXT: vzeroupper 5010; NoVLX-NEXT: retq 5011entry: 5012 %0 = bitcast <2 x i64> %__a to <16 x i8> 5013 %1 = bitcast <2 x i64> %__b to <16 x i8> 5014 %2 = icmp sgt <16 x i8> %0, %1 5015 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5016 %4 = bitcast <64 x i1> %3 to i64 5017 ret i64 %4 5018} 5019 5020define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5021; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: 5022; VLX: # %bb.0: # %entry 5023; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 5024; VLX-NEXT: kmovq %k0, %rax 5025; VLX-NEXT: retq 5026; 5027; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: 5028; NoVLX: # %bb.0: # %entry 5029; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 5030; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5031; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5032; NoVLX-NEXT: kmovw %k0, %eax 5033; NoVLX-NEXT: movzwl %ax, %eax 5034; NoVLX-NEXT: vzeroupper 5035; NoVLX-NEXT: retq 5036entry: 5037 %0 = bitcast <2 x i64> %__a to <16 x i8> 5038 %load = load <2 x i64>, <2 x i64>* %__b 5039 %1 = bitcast <2 x i64> %load to <16 x i8> 5040 %2 = icmp sgt <16 x i8> %0, %1 5041 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5042 %4 = bitcast <64 x i1> %3 to i64 5043 ret i64 %4 5044} 5045 5046define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5047; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: 5048; VLX: # %bb.0: # %entry 5049; VLX-NEXT: kmovd %edi, %k1 5050; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 5051; VLX-NEXT: kmovq %k0, %rax 5052; VLX-NEXT: retq 5053; 5054; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: 5055; NoVLX: # %bb.0: # %entry 5056; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 5057; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5058; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5059; NoVLX-NEXT: kmovw %k0, %eax 5060; NoVLX-NEXT: andl %edi, %eax 5061; NoVLX-NEXT: vzeroupper 5062; NoVLX-NEXT: retq 5063entry: 5064 %0 = bitcast <2 x i64> %__a to <16 x i8> 5065 %1 = bitcast <2 x i64> %__b to <16 x i8> 5066 %2 = icmp sgt <16 x i8> %0, %1 5067 %3 = bitcast i16 %__u to <16 x i1> 5068 %4 = and <16 x i1> %2, %3 5069 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5070 %6 = bitcast <64 x i1> %5 to i64 5071 ret i64 %6 5072} 5073 5074define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5075; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: 5076; VLX: # %bb.0: # %entry 5077; VLX-NEXT: kmovd %edi, %k1 5078; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} 5079; VLX-NEXT: kmovq %k0, %rax 5080; VLX-NEXT: retq 5081; 5082; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: 5083; NoVLX: # %bb.0: # %entry 5084; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 5085; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5086; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5087; NoVLX-NEXT: kmovw %k0, %eax 5088; NoVLX-NEXT: andl %edi, %eax 5089; NoVLX-NEXT: vzeroupper 5090; NoVLX-NEXT: retq 5091entry: 5092 %0 = bitcast <2 x i64> %__a to <16 x i8> 5093 %load = load <2 x i64>, <2 x i64>* %__b 5094 %1 = bitcast <2 x i64> %load to <16 x i8> 5095 %2 = icmp sgt <16 x i8> %0, %1 5096 %3 = bitcast i16 %__u to <16 x i1> 5097 %4 = and <16 x i1> %2, %3 5098 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5099 %6 = bitcast <64 x i1> %5 to i64 5100 ret i64 %6 5101} 5102 5103 5104define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5105; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: 5106; VLX: # %bb.0: # %entry 5107; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 5108; VLX-NEXT: kmovq %k0, %rax 5109; VLX-NEXT: vzeroupper 5110; VLX-NEXT: retq 5111; 5112; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: 5113; NoVLX: # %bb.0: # %entry 5114; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 5115; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5116; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5117; NoVLX-NEXT: kmovw %k0, %ecx 5118; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5119; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5120; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5121; NoVLX-NEXT: kmovw %k0, %eax 5122; NoVLX-NEXT: shll $16, %eax 5123; NoVLX-NEXT: orl %ecx, %eax 5124; NoVLX-NEXT: vzeroupper 5125; NoVLX-NEXT: retq 5126entry: 5127 %0 = bitcast <4 x i64> %__a to <32 x i8> 5128 %1 = bitcast <4 x i64> %__b to <32 x i8> 5129 %2 = icmp sgt <32 x i8> %0, %1 5130 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5131 %4 = bitcast <64 x i1> %3 to i64 5132 ret i64 %4 5133} 5134 5135define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5136; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: 5137; VLX: # %bb.0: # %entry 5138; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 5139; VLX-NEXT: kmovq %k0, %rax 5140; VLX-NEXT: vzeroupper 5141; VLX-NEXT: retq 5142; 5143; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: 5144; NoVLX: # %bb.0: # %entry 5145; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 5146; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5147; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5148; NoVLX-NEXT: kmovw %k0, %ecx 5149; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5150; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5151; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5152; NoVLX-NEXT: kmovw %k0, %eax 5153; NoVLX-NEXT: shll $16, %eax 5154; NoVLX-NEXT: orl %ecx, %eax 5155; NoVLX-NEXT: vzeroupper 5156; NoVLX-NEXT: retq 5157entry: 5158 %0 = bitcast <4 x i64> %__a to <32 x i8> 5159 %load = load <4 x i64>, <4 x i64>* %__b 5160 %1 = bitcast <4 x i64> %load to <32 x i8> 5161 %2 = icmp sgt <32 x i8> %0, %1 5162 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5163 %4 = bitcast <64 x i1> %3 to i64 5164 ret i64 %4 5165} 5166 5167define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5168; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: 5169; VLX: # %bb.0: # %entry 5170; VLX-NEXT: kmovd %edi, %k1 5171; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} 5172; VLX-NEXT: kmovq %k0, %rax 5173; VLX-NEXT: vzeroupper 5174; VLX-NEXT: retq 5175; 5176; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: 5177; NoVLX: # %bb.0: # %entry 5178; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 5179; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5180; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5181; NoVLX-NEXT: kmovw %k0, %eax 5182; NoVLX-NEXT: andl %edi, %eax 5183; NoVLX-NEXT: shrl $16, %edi 5184; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5185; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5186; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5187; NoVLX-NEXT: kmovw %k0, %ecx 5188; NoVLX-NEXT: andl %edi, %ecx 5189; NoVLX-NEXT: shll $16, %ecx 5190; NoVLX-NEXT: movzwl %ax, %eax 5191; NoVLX-NEXT: orl %ecx, %eax 5192; NoVLX-NEXT: vzeroupper 5193; NoVLX-NEXT: retq 5194entry: 5195 %0 = bitcast <4 x i64> %__a to <32 x i8> 5196 %1 = bitcast <4 x i64> %__b to <32 x i8> 5197 %2 = icmp sgt <32 x i8> %0, %1 5198 %3 = bitcast i32 %__u to <32 x i1> 5199 %4 = and <32 x i1> %2, %3 5200 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5201 %6 = bitcast <64 x i1> %5 to i64 5202 ret i64 %6 5203} 5204 5205define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5206; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: 5207; VLX: # %bb.0: # %entry 5208; VLX-NEXT: kmovd %edi, %k1 5209; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1} 5210; VLX-NEXT: kmovq %k0, %rax 5211; VLX-NEXT: vzeroupper 5212; VLX-NEXT: retq 5213; 5214; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: 5215; NoVLX: # %bb.0: # %entry 5216; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 5217; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5218; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5219; NoVLX-NEXT: kmovw %k0, %eax 5220; NoVLX-NEXT: andl %edi, %eax 5221; NoVLX-NEXT: shrl $16, %edi 5222; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5223; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5224; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5225; NoVLX-NEXT: kmovw %k0, %ecx 5226; NoVLX-NEXT: andl %edi, %ecx 5227; NoVLX-NEXT: shll $16, %ecx 5228; NoVLX-NEXT: movzwl %ax, %eax 5229; NoVLX-NEXT: orl %ecx, %eax 5230; NoVLX-NEXT: vzeroupper 5231; NoVLX-NEXT: retq 5232entry: 5233 %0 = bitcast <4 x i64> %__a to <32 x i8> 5234 %load = load <4 x i64>, <4 x i64>* %__b 5235 %1 = bitcast <4 x i64> %load to <32 x i8> 5236 %2 = icmp sgt <32 x i8> %0, %1 5237 %3 = bitcast i32 %__u to <32 x i1> 5238 %4 = and <32 x i1> %2, %3 5239 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5240 %6 = bitcast <64 x i1> %5 to i64 5241 ret i64 %6 5242} 5243 5244 5245define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5246; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: 5247; VLX: # %bb.0: # %entry 5248; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5249; VLX-NEXT: kmovd %k0, %eax 5250; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5251; VLX-NEXT: retq 5252; 5253; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: 5254; NoVLX: # %bb.0: # %entry 5255; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5256; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5257; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5258; NoVLX-NEXT: kmovw %k0, %eax 5259; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5260; NoVLX-NEXT: vzeroupper 5261; NoVLX-NEXT: retq 5262entry: 5263 %0 = bitcast <2 x i64> %__a to <8 x i16> 5264 %1 = bitcast <2 x i64> %__b to <8 x i16> 5265 %2 = icmp sgt <8 x i16> %0, %1 5266 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5267 %4 = bitcast <16 x i1> %3 to i16 5268 ret i16 %4 5269} 5270 5271define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5272; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: 5273; VLX: # %bb.0: # %entry 5274; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5275; VLX-NEXT: kmovd %k0, %eax 5276; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5277; VLX-NEXT: retq 5278; 5279; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: 5280; NoVLX: # %bb.0: # %entry 5281; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5282; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5283; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5284; NoVLX-NEXT: kmovw %k0, %eax 5285; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5286; NoVLX-NEXT: vzeroupper 5287; NoVLX-NEXT: retq 5288entry: 5289 %0 = bitcast <2 x i64> %__a to <8 x i16> 5290 %load = load <2 x i64>, <2 x i64>* %__b 5291 %1 = bitcast <2 x i64> %load to <8 x i16> 5292 %2 = icmp sgt <8 x i16> %0, %1 5293 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5294 %4 = bitcast <16 x i1> %3 to i16 5295 ret i16 %4 5296} 5297 5298define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5299; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: 5300; VLX: # %bb.0: # %entry 5301; VLX-NEXT: kmovd %edi, %k1 5302; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5303; VLX-NEXT: kmovd %k0, %eax 5304; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5305; VLX-NEXT: retq 5306; 5307; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: 5308; NoVLX: # %bb.0: # %entry 5309; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5310; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5311; NoVLX-NEXT: kmovw %edi, %k1 5312; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5313; NoVLX-NEXT: kmovw %k0, %eax 5314; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5315; NoVLX-NEXT: vzeroupper 5316; NoVLX-NEXT: retq 5317entry: 5318 %0 = bitcast <2 x i64> %__a to <8 x i16> 5319 %1 = bitcast <2 x i64> %__b to <8 x i16> 5320 %2 = icmp sgt <8 x i16> %0, %1 5321 %3 = bitcast i8 %__u to <8 x i1> 5322 %4 = and <8 x i1> %2, %3 5323 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5324 %6 = bitcast <16 x i1> %5 to i16 5325 ret i16 %6 5326} 5327 5328define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5329; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: 5330; VLX: # %bb.0: # %entry 5331; VLX-NEXT: kmovd %edi, %k1 5332; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5333; VLX-NEXT: kmovd %k0, %eax 5334; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5335; VLX-NEXT: retq 5336; 5337; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: 5338; NoVLX: # %bb.0: # %entry 5339; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5340; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5341; NoVLX-NEXT: kmovw %edi, %k1 5342; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5343; NoVLX-NEXT: kmovw %k0, %eax 5344; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5345; NoVLX-NEXT: vzeroupper 5346; NoVLX-NEXT: retq 5347entry: 5348 %0 = bitcast <2 x i64> %__a to <8 x i16> 5349 %load = load <2 x i64>, <2 x i64>* %__b 5350 %1 = bitcast <2 x i64> %load to <8 x i16> 5351 %2 = icmp sgt <8 x i16> %0, %1 5352 %3 = bitcast i8 %__u to <8 x i1> 5353 %4 = and <8 x i1> %2, %3 5354 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5355 %6 = bitcast <16 x i1> %5 to i16 5356 ret i16 %6 5357} 5358 5359 5360define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5361; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: 5362; VLX: # %bb.0: # %entry 5363; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5364; VLX-NEXT: kmovd %k0, %eax 5365; VLX-NEXT: retq 5366; 5367; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: 5368; NoVLX: # %bb.0: # %entry 5369; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5370; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5371; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5372; NoVLX-NEXT: kmovw %k0, %eax 5373; NoVLX-NEXT: vzeroupper 5374; NoVLX-NEXT: retq 5375entry: 5376 %0 = bitcast <2 x i64> %__a to <8 x i16> 5377 %1 = bitcast <2 x i64> %__b to <8 x i16> 5378 %2 = icmp sgt <8 x i16> %0, %1 5379 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5380 %4 = bitcast <32 x i1> %3 to i32 5381 ret i32 %4 5382} 5383 5384define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5385; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: 5386; VLX: # %bb.0: # %entry 5387; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5388; VLX-NEXT: kmovd %k0, %eax 5389; VLX-NEXT: retq 5390; 5391; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: 5392; NoVLX: # %bb.0: # %entry 5393; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5394; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5395; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5396; NoVLX-NEXT: kmovw %k0, %eax 5397; NoVLX-NEXT: vzeroupper 5398; NoVLX-NEXT: retq 5399entry: 5400 %0 = bitcast <2 x i64> %__a to <8 x i16> 5401 %load = load <2 x i64>, <2 x i64>* %__b 5402 %1 = bitcast <2 x i64> %load to <8 x i16> 5403 %2 = icmp sgt <8 x i16> %0, %1 5404 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5405 %4 = bitcast <32 x i1> %3 to i32 5406 ret i32 %4 5407} 5408 5409define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5410; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: 5411; VLX: # %bb.0: # %entry 5412; VLX-NEXT: kmovd %edi, %k1 5413; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5414; VLX-NEXT: kmovd %k0, %eax 5415; VLX-NEXT: retq 5416; 5417; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: 5418; NoVLX: # %bb.0: # %entry 5419; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5420; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5421; NoVLX-NEXT: kmovw %edi, %k1 5422; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5423; NoVLX-NEXT: kmovw %k0, %eax 5424; NoVLX-NEXT: vzeroupper 5425; NoVLX-NEXT: retq 5426entry: 5427 %0 = bitcast <2 x i64> %__a to <8 x i16> 5428 %1 = bitcast <2 x i64> %__b to <8 x i16> 5429 %2 = icmp sgt <8 x i16> %0, %1 5430 %3 = bitcast i8 %__u to <8 x i1> 5431 %4 = and <8 x i1> %2, %3 5432 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5433 %6 = bitcast <32 x i1> %5 to i32 5434 ret i32 %6 5435} 5436 5437define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5438; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: 5439; VLX: # %bb.0: # %entry 5440; VLX-NEXT: kmovd %edi, %k1 5441; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5442; VLX-NEXT: kmovd %k0, %eax 5443; VLX-NEXT: retq 5444; 5445; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: 5446; NoVLX: # %bb.0: # %entry 5447; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5448; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5449; NoVLX-NEXT: kmovw %edi, %k1 5450; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5451; NoVLX-NEXT: kmovw %k0, %eax 5452; NoVLX-NEXT: vzeroupper 5453; NoVLX-NEXT: retq 5454entry: 5455 %0 = bitcast <2 x i64> %__a to <8 x i16> 5456 %load = load <2 x i64>, <2 x i64>* %__b 5457 %1 = bitcast <2 x i64> %load to <8 x i16> 5458 %2 = icmp sgt <8 x i16> %0, %1 5459 %3 = bitcast i8 %__u to <8 x i1> 5460 %4 = and <8 x i1> %2, %3 5461 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5462 %6 = bitcast <32 x i1> %5 to i32 5463 ret i32 %6 5464} 5465 5466 5467define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5468; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: 5469; VLX: # %bb.0: # %entry 5470; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5471; VLX-NEXT: kmovq %k0, %rax 5472; VLX-NEXT: retq 5473; 5474; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: 5475; NoVLX: # %bb.0: # %entry 5476; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5477; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5478; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5479; NoVLX-NEXT: kmovw %k0, %eax 5480; NoVLX-NEXT: movzwl %ax, %eax 5481; NoVLX-NEXT: vzeroupper 5482; NoVLX-NEXT: retq 5483entry: 5484 %0 = bitcast <2 x i64> %__a to <8 x i16> 5485 %1 = bitcast <2 x i64> %__b to <8 x i16> 5486 %2 = icmp sgt <8 x i16> %0, %1 5487 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5488 %4 = bitcast <64 x i1> %3 to i64 5489 ret i64 %4 5490} 5491 5492define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5493; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: 5494; VLX: # %bb.0: # %entry 5495; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5496; VLX-NEXT: kmovq %k0, %rax 5497; VLX-NEXT: retq 5498; 5499; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: 5500; NoVLX: # %bb.0: # %entry 5501; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5502; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5503; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5504; NoVLX-NEXT: kmovw %k0, %eax 5505; NoVLX-NEXT: movzwl %ax, %eax 5506; NoVLX-NEXT: vzeroupper 5507; NoVLX-NEXT: retq 5508entry: 5509 %0 = bitcast <2 x i64> %__a to <8 x i16> 5510 %load = load <2 x i64>, <2 x i64>* %__b 5511 %1 = bitcast <2 x i64> %load to <8 x i16> 5512 %2 = icmp sgt <8 x i16> %0, %1 5513 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5514 %4 = bitcast <64 x i1> %3 to i64 5515 ret i64 %4 5516} 5517 5518define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5519; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: 5520; VLX: # %bb.0: # %entry 5521; VLX-NEXT: kmovd %edi, %k1 5522; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5523; VLX-NEXT: kmovq %k0, %rax 5524; VLX-NEXT: retq 5525; 5526; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: 5527; NoVLX: # %bb.0: # %entry 5528; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5529; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5530; NoVLX-NEXT: kmovw %edi, %k1 5531; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5532; NoVLX-NEXT: kmovw %k0, %eax 5533; NoVLX-NEXT: movzwl %ax, %eax 5534; NoVLX-NEXT: vzeroupper 5535; NoVLX-NEXT: retq 5536entry: 5537 %0 = bitcast <2 x i64> %__a to <8 x i16> 5538 %1 = bitcast <2 x i64> %__b to <8 x i16> 5539 %2 = icmp sgt <8 x i16> %0, %1 5540 %3 = bitcast i8 %__u to <8 x i1> 5541 %4 = and <8 x i1> %2, %3 5542 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5543 %6 = bitcast <64 x i1> %5 to i64 5544 ret i64 %6 5545} 5546 5547define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5548; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: 5549; VLX: # %bb.0: # %entry 5550; VLX-NEXT: kmovd %edi, %k1 5551; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5552; VLX-NEXT: kmovq %k0, %rax 5553; VLX-NEXT: retq 5554; 5555; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: 5556; NoVLX: # %bb.0: # %entry 5557; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5558; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5559; NoVLX-NEXT: kmovw %edi, %k1 5560; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5561; NoVLX-NEXT: kmovw %k0, %eax 5562; NoVLX-NEXT: movzwl %ax, %eax 5563; NoVLX-NEXT: vzeroupper 5564; NoVLX-NEXT: retq 5565entry: 5566 %0 = bitcast <2 x i64> %__a to <8 x i16> 5567 %load = load <2 x i64>, <2 x i64>* %__b 5568 %1 = bitcast <2 x i64> %load to <8 x i16> 5569 %2 = icmp sgt <8 x i16> %0, %1 5570 %3 = bitcast i8 %__u to <8 x i1> 5571 %4 = and <8 x i1> %2, %3 5572 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5573 %6 = bitcast <64 x i1> %5 to i64 5574 ret i64 %6 5575} 5576 5577 5578define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5579; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: 5580; VLX: # %bb.0: # %entry 5581; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 5582; VLX-NEXT: kmovd %k0, %eax 5583; VLX-NEXT: vzeroupper 5584; VLX-NEXT: retq 5585; 5586; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: 5587; NoVLX: # %bb.0: # %entry 5588; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5589; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5590; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5591; NoVLX-NEXT: kmovw %k0, %eax 5592; NoVLX-NEXT: vzeroupper 5593; NoVLX-NEXT: retq 5594entry: 5595 %0 = bitcast <4 x i64> %__a to <16 x i16> 5596 %1 = bitcast <4 x i64> %__b to <16 x i16> 5597 %2 = icmp sgt <16 x i16> %0, %1 5598 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5599 %4 = bitcast <32 x i1> %3 to i32 5600 ret i32 %4 5601} 5602 5603define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5604; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: 5605; VLX: # %bb.0: # %entry 5606; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 5607; VLX-NEXT: kmovd %k0, %eax 5608; VLX-NEXT: vzeroupper 5609; VLX-NEXT: retq 5610; 5611; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: 5612; NoVLX: # %bb.0: # %entry 5613; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 5614; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5615; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5616; NoVLX-NEXT: kmovw %k0, %eax 5617; NoVLX-NEXT: vzeroupper 5618; NoVLX-NEXT: retq 5619entry: 5620 %0 = bitcast <4 x i64> %__a to <16 x i16> 5621 %load = load <4 x i64>, <4 x i64>* %__b 5622 %1 = bitcast <4 x i64> %load to <16 x i16> 5623 %2 = icmp sgt <16 x i16> %0, %1 5624 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5625 %4 = bitcast <32 x i1> %3 to i32 5626 ret i32 %4 5627} 5628 5629define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5630; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: 5631; VLX: # %bb.0: # %entry 5632; VLX-NEXT: kmovd %edi, %k1 5633; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} 5634; VLX-NEXT: kmovd %k0, %eax 5635; VLX-NEXT: vzeroupper 5636; VLX-NEXT: retq 5637; 5638; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: 5639; NoVLX: # %bb.0: # %entry 5640; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5641; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5642; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5643; NoVLX-NEXT: kmovw %k0, %eax 5644; NoVLX-NEXT: andl %edi, %eax 5645; NoVLX-NEXT: vzeroupper 5646; NoVLX-NEXT: retq 5647entry: 5648 %0 = bitcast <4 x i64> %__a to <16 x i16> 5649 %1 = bitcast <4 x i64> %__b to <16 x i16> 5650 %2 = icmp sgt <16 x i16> %0, %1 5651 %3 = bitcast i16 %__u to <16 x i1> 5652 %4 = and <16 x i1> %2, %3 5653 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5654 %6 = bitcast <32 x i1> %5 to i32 5655 ret i32 %6 5656} 5657 5658define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5659; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: 5660; VLX: # %bb.0: # %entry 5661; VLX-NEXT: kmovd %edi, %k1 5662; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} 5663; VLX-NEXT: kmovd %k0, %eax 5664; VLX-NEXT: vzeroupper 5665; VLX-NEXT: retq 5666; 5667; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: 5668; NoVLX: # %bb.0: # %entry 5669; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 5670; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5671; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5672; NoVLX-NEXT: kmovw %k0, %eax 5673; NoVLX-NEXT: andl %edi, %eax 5674; NoVLX-NEXT: vzeroupper 5675; NoVLX-NEXT: retq 5676entry: 5677 %0 = bitcast <4 x i64> %__a to <16 x i16> 5678 %load = load <4 x i64>, <4 x i64>* %__b 5679 %1 = bitcast <4 x i64> %load to <16 x i16> 5680 %2 = icmp sgt <16 x i16> %0, %1 5681 %3 = bitcast i16 %__u to <16 x i1> 5682 %4 = and <16 x i1> %2, %3 5683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5684 %6 = bitcast <32 x i1> %5 to i32 5685 ret i32 %6 5686} 5687 5688 5689define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5690; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: 5691; VLX: # %bb.0: # %entry 5692; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 5693; VLX-NEXT: kmovq %k0, %rax 5694; VLX-NEXT: vzeroupper 5695; VLX-NEXT: retq 5696; 5697; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: 5698; NoVLX: # %bb.0: # %entry 5699; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5700; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5701; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5702; NoVLX-NEXT: kmovw %k0, %eax 5703; NoVLX-NEXT: movzwl %ax, %eax 5704; NoVLX-NEXT: vzeroupper 5705; NoVLX-NEXT: retq 5706entry: 5707 %0 = bitcast <4 x i64> %__a to <16 x i16> 5708 %1 = bitcast <4 x i64> %__b to <16 x i16> 5709 %2 = icmp sgt <16 x i16> %0, %1 5710 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5711 %4 = bitcast <64 x i1> %3 to i64 5712 ret i64 %4 5713} 5714 5715define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5716; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: 5717; VLX: # %bb.0: # %entry 5718; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 5719; VLX-NEXT: kmovq %k0, %rax 5720; VLX-NEXT: vzeroupper 5721; VLX-NEXT: retq 5722; 5723; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: 5724; NoVLX: # %bb.0: # %entry 5725; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 5726; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5727; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5728; NoVLX-NEXT: kmovw %k0, %eax 5729; NoVLX-NEXT: movzwl %ax, %eax 5730; NoVLX-NEXT: vzeroupper 5731; NoVLX-NEXT: retq 5732entry: 5733 %0 = bitcast <4 x i64> %__a to <16 x i16> 5734 %load = load <4 x i64>, <4 x i64>* %__b 5735 %1 = bitcast <4 x i64> %load to <16 x i16> 5736 %2 = icmp sgt <16 x i16> %0, %1 5737 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5738 %4 = bitcast <64 x i1> %3 to i64 5739 ret i64 %4 5740} 5741 5742define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5743; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: 5744; VLX: # %bb.0: # %entry 5745; VLX-NEXT: kmovd %edi, %k1 5746; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} 5747; VLX-NEXT: kmovq %k0, %rax 5748; VLX-NEXT: vzeroupper 5749; VLX-NEXT: retq 5750; 5751; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: 5752; NoVLX: # %bb.0: # %entry 5753; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5754; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5755; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5756; NoVLX-NEXT: kmovw %k0, %eax 5757; NoVLX-NEXT: andl %edi, %eax 5758; NoVLX-NEXT: vzeroupper 5759; NoVLX-NEXT: retq 5760entry: 5761 %0 = bitcast <4 x i64> %__a to <16 x i16> 5762 %1 = bitcast <4 x i64> %__b to <16 x i16> 5763 %2 = icmp sgt <16 x i16> %0, %1 5764 %3 = bitcast i16 %__u to <16 x i1> 5765 %4 = and <16 x i1> %2, %3 5766 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5767 %6 = bitcast <64 x i1> %5 to i64 5768 ret i64 %6 5769} 5770 5771define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 5772; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: 5773; VLX: # %bb.0: # %entry 5774; VLX-NEXT: kmovd %edi, %k1 5775; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} 5776; VLX-NEXT: kmovq %k0, %rax 5777; VLX-NEXT: vzeroupper 5778; VLX-NEXT: retq 5779; 5780; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: 5781; NoVLX: # %bb.0: # %entry 5782; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 5783; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5784; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5785; NoVLX-NEXT: kmovw %k0, %eax 5786; NoVLX-NEXT: andl %edi, %eax 5787; NoVLX-NEXT: vzeroupper 5788; NoVLX-NEXT: retq 5789entry: 5790 %0 = bitcast <4 x i64> %__a to <16 x i16> 5791 %load = load <4 x i64>, <4 x i64>* %__b 5792 %1 = bitcast <4 x i64> %load to <16 x i16> 5793 %2 = icmp sgt <16 x i16> %0, %1 5794 %3 = bitcast i16 %__u to <16 x i1> 5795 %4 = and <16 x i1> %2, %3 5796 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5797 %6 = bitcast <64 x i1> %5 to i64 5798 ret i64 %6 5799} 5800 5801 5802define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 5803; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: 5804; VLX: # %bb.0: # %entry 5805; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 5806; VLX-NEXT: kmovq %k0, %rax 5807; VLX-NEXT: vzeroupper 5808; VLX-NEXT: retq 5809; 5810; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: 5811; NoVLX: # %bb.0: # %entry 5812; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 5813; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 5814; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 5815; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5816; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5817; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5818; NoVLX-NEXT: kmovw %k0, %ecx 5819; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 5820; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5821; NoVLX-NEXT: kmovw %k0, %eax 5822; NoVLX-NEXT: shll $16, %eax 5823; NoVLX-NEXT: orl %ecx, %eax 5824; NoVLX-NEXT: vzeroupper 5825; NoVLX-NEXT: retq 5826entry: 5827 %0 = bitcast <8 x i64> %__a to <32 x i16> 5828 %1 = bitcast <8 x i64> %__b to <32 x i16> 5829 %2 = icmp sgt <32 x i16> %0, %1 5830 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5831 %4 = bitcast <64 x i1> %3 to i64 5832 ret i64 %4 5833} 5834 5835define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 5836; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: 5837; VLX: # %bb.0: # %entry 5838; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0 5839; VLX-NEXT: kmovq %k0, %rax 5840; VLX-NEXT: vzeroupper 5841; VLX-NEXT: retq 5842; 5843; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: 5844; NoVLX: # %bb.0: # %entry 5845; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 5846; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 5847; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5848; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5849; NoVLX-NEXT: kmovw %k0, %ecx 5850; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0 5851; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5852; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5853; NoVLX-NEXT: kmovw %k0, %eax 5854; NoVLX-NEXT: shll $16, %eax 5855; NoVLX-NEXT: orl %ecx, %eax 5856; NoVLX-NEXT: vzeroupper 5857; NoVLX-NEXT: retq 5858entry: 5859 %0 = bitcast <8 x i64> %__a to <32 x i16> 5860 %load = load <8 x i64>, <8 x i64>* %__b 5861 %1 = bitcast <8 x i64> %load to <32 x i16> 5862 %2 = icmp sgt <32 x i16> %0, %1 5863 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5864 %4 = bitcast <64 x i1> %3 to i64 5865 ret i64 %4 5866} 5867 5868define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 5869; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: 5870; VLX: # %bb.0: # %entry 5871; VLX-NEXT: kmovd %edi, %k1 5872; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 5873; VLX-NEXT: kmovq %k0, %rax 5874; VLX-NEXT: vzeroupper 5875; VLX-NEXT: retq 5876; 5877; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: 5878; NoVLX: # %bb.0: # %entry 5879; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 5880; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 5881; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 5882; NoVLX-NEXT: kmovw %k0, %eax 5883; NoVLX-NEXT: andl %edi, %eax 5884; NoVLX-NEXT: shrl $16, %edi 5885; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5886; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 5887; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5888; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5889; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5890; NoVLX-NEXT: kmovw %k0, %ecx 5891; NoVLX-NEXT: andl %edi, %ecx 5892; NoVLX-NEXT: shll $16, %ecx 5893; NoVLX-NEXT: movzwl %ax, %eax 5894; NoVLX-NEXT: orl %ecx, %eax 5895; NoVLX-NEXT: vzeroupper 5896; NoVLX-NEXT: retq 5897entry: 5898 %0 = bitcast <8 x i64> %__a to <32 x i16> 5899 %1 = bitcast <8 x i64> %__b to <32 x i16> 5900 %2 = icmp sgt <32 x i16> %0, %1 5901 %3 = bitcast i32 %__u to <32 x i1> 5902 %4 = and <32 x i1> %2, %3 5903 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5904 %6 = bitcast <64 x i1> %5 to i64 5905 ret i64 %6 5906} 5907 5908define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 5909; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: 5910; VLX: # %bb.0: # %entry 5911; VLX-NEXT: kmovd %edi, %k1 5912; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1} 5913; VLX-NEXT: kmovq %k0, %rax 5914; VLX-NEXT: vzeroupper 5915; VLX-NEXT: retq 5916; 5917; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: 5918; NoVLX: # %bb.0: # %entry 5919; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1 5920; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 5921; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5922; NoVLX-NEXT: kmovw %k0, %eax 5923; NoVLX-NEXT: andl %edi, %eax 5924; NoVLX-NEXT: shrl $16, %edi 5925; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5926; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0 5927; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5928; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5929; NoVLX-NEXT: kmovw %k0, %ecx 5930; NoVLX-NEXT: andl %edi, %ecx 5931; NoVLX-NEXT: shll $16, %ecx 5932; NoVLX-NEXT: movzwl %ax, %eax 5933; NoVLX-NEXT: orl %ecx, %eax 5934; NoVLX-NEXT: vzeroupper 5935; NoVLX-NEXT: retq 5936entry: 5937 %0 = bitcast <8 x i64> %__a to <32 x i16> 5938 %load = load <8 x i64>, <8 x i64>* %__b 5939 %1 = bitcast <8 x i64> %load to <32 x i16> 5940 %2 = icmp sgt <32 x i16> %0, %1 5941 %3 = bitcast i32 %__u to <32 x i1> 5942 %4 = and <32 x i1> %2, %3 5943 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5944 %6 = bitcast <64 x i1> %5 to i64 5945 ret i64 %6 5946} 5947 5948 5949define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5950; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: 5951; VLX: # %bb.0: # %entry 5952; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 5953; VLX-NEXT: kmovd %k0, %eax 5954; VLX-NEXT: # kill: def $al killed $al killed $eax 5955; VLX-NEXT: retq 5956; 5957; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: 5958; NoVLX: # %bb.0: # %entry 5959; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 5960; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5961; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 5962; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5963; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5964; NoVLX-NEXT: kmovw %k0, %eax 5965; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5966; NoVLX-NEXT: vzeroupper 5967; NoVLX-NEXT: retq 5968entry: 5969 %0 = bitcast <2 x i64> %__a to <4 x i32> 5970 %1 = bitcast <2 x i64> %__b to <4 x i32> 5971 %2 = icmp sgt <4 x i32> %0, %1 5972 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5973 %4 = bitcast <8 x i1> %3 to i8 5974 ret i8 %4 5975} 5976 5977define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 5978; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: 5979; VLX: # %bb.0: # %entry 5980; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 5981; VLX-NEXT: kmovd %k0, %eax 5982; VLX-NEXT: # kill: def $al killed $al killed $eax 5983; VLX-NEXT: retq 5984; 5985; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: 5986; NoVLX: # %bb.0: # %entry 5987; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5988; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 5989; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 5990; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5991; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5992; NoVLX-NEXT: kmovw %k0, %eax 5993; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5994; NoVLX-NEXT: vzeroupper 5995; NoVLX-NEXT: retq 5996entry: 5997 %0 = bitcast <2 x i64> %__a to <4 x i32> 5998 %load = load <2 x i64>, <2 x i64>* %__b 5999 %1 = bitcast <2 x i64> %load to <4 x i32> 6000 %2 = icmp sgt <4 x i32> %0, %1 6001 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6002 %4 = bitcast <8 x i1> %3 to i8 6003 ret i8 %4 6004} 6005 6006define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6007; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: 6008; VLX: # %bb.0: # %entry 6009; VLX-NEXT: kmovd %edi, %k1 6010; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6011; VLX-NEXT: kmovd %k0, %eax 6012; VLX-NEXT: # kill: def $al killed $al killed $eax 6013; VLX-NEXT: retq 6014; 6015; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: 6016; NoVLX: # %bb.0: # %entry 6017; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6018; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6019; NoVLX-NEXT: kmovw %edi, %k1 6020; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6021; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6022; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6023; NoVLX-NEXT: kmovw %k0, %eax 6024; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6025; NoVLX-NEXT: vzeroupper 6026; NoVLX-NEXT: retq 6027entry: 6028 %0 = bitcast <2 x i64> %__a to <4 x i32> 6029 %1 = bitcast <2 x i64> %__b to <4 x i32> 6030 %2 = icmp sgt <4 x i32> %0, %1 6031 %3 = bitcast i8 %__u to <8 x i1> 6032 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6033 %4 = and <4 x i1> %2, %extract.i 6034 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6035 %6 = bitcast <8 x i1> %5 to i8 6036 ret i8 %6 6037} 6038 6039define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6040; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: 6041; VLX: # %bb.0: # %entry 6042; VLX-NEXT: kmovd %edi, %k1 6043; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6044; VLX-NEXT: kmovd %k0, %eax 6045; VLX-NEXT: # kill: def $al killed $al killed $eax 6046; VLX-NEXT: retq 6047; 6048; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: 6049; NoVLX: # %bb.0: # %entry 6050; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6051; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6052; NoVLX-NEXT: kmovw %edi, %k1 6053; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6054; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6055; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6056; NoVLX-NEXT: kmovw %k0, %eax 6057; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6058; NoVLX-NEXT: vzeroupper 6059; NoVLX-NEXT: retq 6060entry: 6061 %0 = bitcast <2 x i64> %__a to <4 x i32> 6062 %load = load <2 x i64>, <2 x i64>* %__b 6063 %1 = bitcast <2 x i64> %load to <4 x i32> 6064 %2 = icmp sgt <4 x i32> %0, %1 6065 %3 = bitcast i8 %__u to <8 x i1> 6066 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6067 %4 = and <4 x i1> %2, %extract.i 6068 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6069 %6 = bitcast <8 x i1> %5 to i8 6070 ret i8 %6 6071} 6072 6073 6074define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 6075; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6076; VLX: # %bb.0: # %entry 6077; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6078; VLX-NEXT: kmovd %k0, %eax 6079; VLX-NEXT: # kill: def $al killed $al killed $eax 6080; VLX-NEXT: retq 6081; 6082; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6083; NoVLX: # %bb.0: # %entry 6084; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6085; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 6086; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6087; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6088; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6089; NoVLX-NEXT: kmovw %k0, %eax 6090; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6091; NoVLX-NEXT: vzeroupper 6092; NoVLX-NEXT: retq 6093entry: 6094 %0 = bitcast <2 x i64> %__a to <4 x i32> 6095 %load = load i32, i32* %__b 6096 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6097 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6098 %2 = icmp sgt <4 x i32> %0, %1 6099 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6100 %4 = bitcast <8 x i1> %3 to i8 6101 ret i8 %4 6102} 6103 6104define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 6105; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6106; VLX: # %bb.0: # %entry 6107; VLX-NEXT: kmovd %edi, %k1 6108; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6109; VLX-NEXT: kmovd %k0, %eax 6110; VLX-NEXT: # kill: def $al killed $al killed $eax 6111; VLX-NEXT: retq 6112; 6113; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6114; NoVLX: # %bb.0: # %entry 6115; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6116; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 6117; NoVLX-NEXT: kmovw %edi, %k1 6118; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6119; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6120; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6121; NoVLX-NEXT: kmovw %k0, %eax 6122; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6123; NoVLX-NEXT: vzeroupper 6124; NoVLX-NEXT: retq 6125entry: 6126 %0 = bitcast <2 x i64> %__a to <4 x i32> 6127 %load = load i32, i32* %__b 6128 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6129 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6130 %2 = icmp sgt <4 x i32> %0, %1 6131 %3 = bitcast i8 %__u to <8 x i1> 6132 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6133 %4 = and <4 x i1> %extract.i, %2 6134 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6135 %6 = bitcast <8 x i1> %5 to i8 6136 ret i8 %6 6137} 6138 6139 6140define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6141; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: 6142; VLX: # %bb.0: # %entry 6143; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6144; VLX-NEXT: kmovd %k0, %eax 6145; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6146; VLX-NEXT: retq 6147; 6148; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: 6149; NoVLX: # %bb.0: # %entry 6150; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6151; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6152; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6153; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6154; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6155; NoVLX-NEXT: kmovw %k0, %eax 6156; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6157; NoVLX-NEXT: vzeroupper 6158; NoVLX-NEXT: retq 6159entry: 6160 %0 = bitcast <2 x i64> %__a to <4 x i32> 6161 %1 = bitcast <2 x i64> %__b to <4 x i32> 6162 %2 = icmp sgt <4 x i32> %0, %1 6163 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6164 %4 = bitcast <16 x i1> %3 to i16 6165 ret i16 %4 6166} 6167 6168define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6169; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: 6170; VLX: # %bb.0: # %entry 6171; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6172; VLX-NEXT: kmovd %k0, %eax 6173; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6174; VLX-NEXT: retq 6175; 6176; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: 6177; NoVLX: # %bb.0: # %entry 6178; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6179; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6180; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6181; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6182; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6183; NoVLX-NEXT: kmovw %k0, %eax 6184; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6185; NoVLX-NEXT: vzeroupper 6186; NoVLX-NEXT: retq 6187entry: 6188 %0 = bitcast <2 x i64> %__a to <4 x i32> 6189 %load = load <2 x i64>, <2 x i64>* %__b 6190 %1 = bitcast <2 x i64> %load to <4 x i32> 6191 %2 = icmp sgt <4 x i32> %0, %1 6192 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6193 %4 = bitcast <16 x i1> %3 to i16 6194 ret i16 %4 6195} 6196 6197define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6198; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: 6199; VLX: # %bb.0: # %entry 6200; VLX-NEXT: kmovd %edi, %k1 6201; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6202; VLX-NEXT: kmovd %k0, %eax 6203; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6204; VLX-NEXT: retq 6205; 6206; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: 6207; NoVLX: # %bb.0: # %entry 6208; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6209; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6210; NoVLX-NEXT: kmovw %edi, %k1 6211; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6212; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6213; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6214; NoVLX-NEXT: kmovw %k0, %eax 6215; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6216; NoVLX-NEXT: vzeroupper 6217; NoVLX-NEXT: retq 6218entry: 6219 %0 = bitcast <2 x i64> %__a to <4 x i32> 6220 %1 = bitcast <2 x i64> %__b to <4 x i32> 6221 %2 = icmp sgt <4 x i32> %0, %1 6222 %3 = bitcast i8 %__u to <8 x i1> 6223 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6224 %4 = and <4 x i1> %2, %extract.i 6225 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6226 %6 = bitcast <16 x i1> %5 to i16 6227 ret i16 %6 6228} 6229 6230define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6231; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: 6232; VLX: # %bb.0: # %entry 6233; VLX-NEXT: kmovd %edi, %k1 6234; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6235; VLX-NEXT: kmovd %k0, %eax 6236; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6237; VLX-NEXT: retq 6238; 6239; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: 6240; NoVLX: # %bb.0: # %entry 6241; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6242; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6243; NoVLX-NEXT: kmovw %edi, %k1 6244; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6245; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6246; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6247; NoVLX-NEXT: kmovw %k0, %eax 6248; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6249; NoVLX-NEXT: vzeroupper 6250; NoVLX-NEXT: retq 6251entry: 6252 %0 = bitcast <2 x i64> %__a to <4 x i32> 6253 %load = load <2 x i64>, <2 x i64>* %__b 6254 %1 = bitcast <2 x i64> %load to <4 x i32> 6255 %2 = icmp sgt <4 x i32> %0, %1 6256 %3 = bitcast i8 %__u to <8 x i1> 6257 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6258 %4 = and <4 x i1> %2, %extract.i 6259 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6260 %6 = bitcast <16 x i1> %5 to i16 6261 ret i16 %6 6262} 6263 6264 6265define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 6266; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6267; VLX: # %bb.0: # %entry 6268; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6269; VLX-NEXT: kmovd %k0, %eax 6270; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6271; VLX-NEXT: retq 6272; 6273; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6274; NoVLX: # %bb.0: # %entry 6275; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6276; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 6277; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6278; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6279; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6280; NoVLX-NEXT: kmovw %k0, %eax 6281; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6282; NoVLX-NEXT: vzeroupper 6283; NoVLX-NEXT: retq 6284entry: 6285 %0 = bitcast <2 x i64> %__a to <4 x i32> 6286 %load = load i32, i32* %__b 6287 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6288 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6289 %2 = icmp sgt <4 x i32> %0, %1 6290 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6291 %4 = bitcast <16 x i1> %3 to i16 6292 ret i16 %4 6293} 6294 6295define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 6296; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6297; VLX: # %bb.0: # %entry 6298; VLX-NEXT: kmovd %edi, %k1 6299; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6300; VLX-NEXT: kmovd %k0, %eax 6301; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6302; VLX-NEXT: retq 6303; 6304; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6305; NoVLX: # %bb.0: # %entry 6306; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6307; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 6308; NoVLX-NEXT: kmovw %edi, %k1 6309; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6310; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6311; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6312; NoVLX-NEXT: kmovw %k0, %eax 6313; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6314; NoVLX-NEXT: vzeroupper 6315; NoVLX-NEXT: retq 6316entry: 6317 %0 = bitcast <2 x i64> %__a to <4 x i32> 6318 %load = load i32, i32* %__b 6319 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6320 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6321 %2 = icmp sgt <4 x i32> %0, %1 6322 %3 = bitcast i8 %__u to <8 x i1> 6323 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6324 %4 = and <4 x i1> %extract.i, %2 6325 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6326 %6 = bitcast <16 x i1> %5 to i16 6327 ret i16 %6 6328} 6329 6330 6331define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6332; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: 6333; VLX: # %bb.0: # %entry 6334; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6335; VLX-NEXT: kmovd %k0, %eax 6336; VLX-NEXT: retq 6337; 6338; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: 6339; NoVLX: # %bb.0: # %entry 6340; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6341; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6342; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6343; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6344; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6345; NoVLX-NEXT: kmovw %k0, %eax 6346; NoVLX-NEXT: vzeroupper 6347; NoVLX-NEXT: retq 6348entry: 6349 %0 = bitcast <2 x i64> %__a to <4 x i32> 6350 %1 = bitcast <2 x i64> %__b to <4 x i32> 6351 %2 = icmp sgt <4 x i32> %0, %1 6352 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6353 %4 = bitcast <32 x i1> %3 to i32 6354 ret i32 %4 6355} 6356 6357define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6358; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: 6359; VLX: # %bb.0: # %entry 6360; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6361; VLX-NEXT: kmovd %k0, %eax 6362; VLX-NEXT: retq 6363; 6364; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: 6365; NoVLX: # %bb.0: # %entry 6366; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6367; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6368; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6369; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6370; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6371; NoVLX-NEXT: kmovw %k0, %eax 6372; NoVLX-NEXT: vzeroupper 6373; NoVLX-NEXT: retq 6374entry: 6375 %0 = bitcast <2 x i64> %__a to <4 x i32> 6376 %load = load <2 x i64>, <2 x i64>* %__b 6377 %1 = bitcast <2 x i64> %load to <4 x i32> 6378 %2 = icmp sgt <4 x i32> %0, %1 6379 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6380 %4 = bitcast <32 x i1> %3 to i32 6381 ret i32 %4 6382} 6383 6384define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6385; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: 6386; VLX: # %bb.0: # %entry 6387; VLX-NEXT: kmovd %edi, %k1 6388; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6389; VLX-NEXT: kmovd %k0, %eax 6390; VLX-NEXT: retq 6391; 6392; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: 6393; NoVLX: # %bb.0: # %entry 6394; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6395; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6396; NoVLX-NEXT: kmovw %edi, %k1 6397; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6398; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6399; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6400; NoVLX-NEXT: kmovw %k0, %eax 6401; NoVLX-NEXT: vzeroupper 6402; NoVLX-NEXT: retq 6403entry: 6404 %0 = bitcast <2 x i64> %__a to <4 x i32> 6405 %1 = bitcast <2 x i64> %__b to <4 x i32> 6406 %2 = icmp sgt <4 x i32> %0, %1 6407 %3 = bitcast i8 %__u to <8 x i1> 6408 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6409 %4 = and <4 x i1> %2, %extract.i 6410 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6411 %6 = bitcast <32 x i1> %5 to i32 6412 ret i32 %6 6413} 6414 6415define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6416; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: 6417; VLX: # %bb.0: # %entry 6418; VLX-NEXT: kmovd %edi, %k1 6419; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6420; VLX-NEXT: kmovd %k0, %eax 6421; VLX-NEXT: retq 6422; 6423; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: 6424; NoVLX: # %bb.0: # %entry 6425; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6426; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6427; NoVLX-NEXT: kmovw %edi, %k1 6428; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6429; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6430; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6431; NoVLX-NEXT: kmovw %k0, %eax 6432; NoVLX-NEXT: vzeroupper 6433; NoVLX-NEXT: retq 6434entry: 6435 %0 = bitcast <2 x i64> %__a to <4 x i32> 6436 %load = load <2 x i64>, <2 x i64>* %__b 6437 %1 = bitcast <2 x i64> %load to <4 x i32> 6438 %2 = icmp sgt <4 x i32> %0, %1 6439 %3 = bitcast i8 %__u to <8 x i1> 6440 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6441 %4 = and <4 x i1> %2, %extract.i 6442 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6443 %6 = bitcast <32 x i1> %5 to i32 6444 ret i32 %6 6445} 6446 6447 6448define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 6449; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6450; VLX: # %bb.0: # %entry 6451; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6452; VLX-NEXT: kmovd %k0, %eax 6453; VLX-NEXT: retq 6454; 6455; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6456; NoVLX: # %bb.0: # %entry 6457; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6458; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 6459; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6460; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6461; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6462; NoVLX-NEXT: kmovw %k0, %eax 6463; NoVLX-NEXT: vzeroupper 6464; NoVLX-NEXT: retq 6465entry: 6466 %0 = bitcast <2 x i64> %__a to <4 x i32> 6467 %load = load i32, i32* %__b 6468 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6469 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6470 %2 = icmp sgt <4 x i32> %0, %1 6471 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6472 %4 = bitcast <32 x i1> %3 to i32 6473 ret i32 %4 6474} 6475 6476define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 6477; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6478; VLX: # %bb.0: # %entry 6479; VLX-NEXT: kmovd %edi, %k1 6480; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6481; VLX-NEXT: kmovd %k0, %eax 6482; VLX-NEXT: retq 6483; 6484; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6485; NoVLX: # %bb.0: # %entry 6486; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6487; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 6488; NoVLX-NEXT: kmovw %edi, %k1 6489; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6490; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6491; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6492; NoVLX-NEXT: kmovw %k0, %eax 6493; NoVLX-NEXT: vzeroupper 6494; NoVLX-NEXT: retq 6495entry: 6496 %0 = bitcast <2 x i64> %__a to <4 x i32> 6497 %load = load i32, i32* %__b 6498 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6499 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6500 %2 = icmp sgt <4 x i32> %0, %1 6501 %3 = bitcast i8 %__u to <8 x i1> 6502 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6503 %4 = and <4 x i1> %extract.i, %2 6504 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6505 %6 = bitcast <32 x i1> %5 to i32 6506 ret i32 %6 6507} 6508 6509 6510define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6511; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: 6512; VLX: # %bb.0: # %entry 6513; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6514; VLX-NEXT: kmovq %k0, %rax 6515; VLX-NEXT: retq 6516; 6517; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: 6518; NoVLX: # %bb.0: # %entry 6519; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6520; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6521; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6522; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6523; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6524; NoVLX-NEXT: kmovw %k0, %eax 6525; NoVLX-NEXT: movzwl %ax, %eax 6526; NoVLX-NEXT: vzeroupper 6527; NoVLX-NEXT: retq 6528entry: 6529 %0 = bitcast <2 x i64> %__a to <4 x i32> 6530 %1 = bitcast <2 x i64> %__b to <4 x i32> 6531 %2 = icmp sgt <4 x i32> %0, %1 6532 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6533 %4 = bitcast <64 x i1> %3 to i64 6534 ret i64 %4 6535} 6536 6537define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6538; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: 6539; VLX: # %bb.0: # %entry 6540; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6541; VLX-NEXT: kmovq %k0, %rax 6542; VLX-NEXT: retq 6543; 6544; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: 6545; NoVLX: # %bb.0: # %entry 6546; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6547; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6548; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6549; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6550; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6551; NoVLX-NEXT: kmovw %k0, %eax 6552; NoVLX-NEXT: movzwl %ax, %eax 6553; NoVLX-NEXT: vzeroupper 6554; NoVLX-NEXT: retq 6555entry: 6556 %0 = bitcast <2 x i64> %__a to <4 x i32> 6557 %load = load <2 x i64>, <2 x i64>* %__b 6558 %1 = bitcast <2 x i64> %load to <4 x i32> 6559 %2 = icmp sgt <4 x i32> %0, %1 6560 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6561 %4 = bitcast <64 x i1> %3 to i64 6562 ret i64 %4 6563} 6564 6565define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6566; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: 6567; VLX: # %bb.0: # %entry 6568; VLX-NEXT: kmovd %edi, %k1 6569; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6570; VLX-NEXT: kmovq %k0, %rax 6571; VLX-NEXT: retq 6572; 6573; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: 6574; NoVLX: # %bb.0: # %entry 6575; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6576; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6577; NoVLX-NEXT: kmovw %edi, %k1 6578; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6579; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6580; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6581; NoVLX-NEXT: kmovw %k0, %eax 6582; NoVLX-NEXT: movzwl %ax, %eax 6583; NoVLX-NEXT: vzeroupper 6584; NoVLX-NEXT: retq 6585entry: 6586 %0 = bitcast <2 x i64> %__a to <4 x i32> 6587 %1 = bitcast <2 x i64> %__b to <4 x i32> 6588 %2 = icmp sgt <4 x i32> %0, %1 6589 %3 = bitcast i8 %__u to <8 x i1> 6590 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6591 %4 = and <4 x i1> %2, %extract.i 6592 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6593 %6 = bitcast <64 x i1> %5 to i64 6594 ret i64 %6 6595} 6596 6597define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 6598; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: 6599; VLX: # %bb.0: # %entry 6600; VLX-NEXT: kmovd %edi, %k1 6601; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6602; VLX-NEXT: kmovq %k0, %rax 6603; VLX-NEXT: retq 6604; 6605; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: 6606; NoVLX: # %bb.0: # %entry 6607; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6608; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6609; NoVLX-NEXT: kmovw %edi, %k1 6610; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6611; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6612; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6613; NoVLX-NEXT: kmovw %k0, %eax 6614; NoVLX-NEXT: movzwl %ax, %eax 6615; NoVLX-NEXT: vzeroupper 6616; NoVLX-NEXT: retq 6617entry: 6618 %0 = bitcast <2 x i64> %__a to <4 x i32> 6619 %load = load <2 x i64>, <2 x i64>* %__b 6620 %1 = bitcast <2 x i64> %load to <4 x i32> 6621 %2 = icmp sgt <4 x i32> %0, %1 6622 %3 = bitcast i8 %__u to <8 x i1> 6623 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6624 %4 = and <4 x i1> %2, %extract.i 6625 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6626 %6 = bitcast <64 x i1> %5 to i64 6627 ret i64 %6 6628} 6629 6630 6631define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 6632; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6633; VLX: # %bb.0: # %entry 6634; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6635; VLX-NEXT: kmovq %k0, %rax 6636; VLX-NEXT: retq 6637; 6638; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6639; NoVLX: # %bb.0: # %entry 6640; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6641; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 6642; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6643; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6644; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6645; NoVLX-NEXT: kmovw %k0, %eax 6646; NoVLX-NEXT: movzwl %ax, %eax 6647; NoVLX-NEXT: vzeroupper 6648; NoVLX-NEXT: retq 6649entry: 6650 %0 = bitcast <2 x i64> %__a to <4 x i32> 6651 %load = load i32, i32* %__b 6652 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6653 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6654 %2 = icmp sgt <4 x i32> %0, %1 6655 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6656 %4 = bitcast <64 x i1> %3 to i64 6657 ret i64 %4 6658} 6659 6660define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 6661; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6662; VLX: # %bb.0: # %entry 6663; VLX-NEXT: kmovd %edi, %k1 6664; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6665; VLX-NEXT: kmovq %k0, %rax 6666; VLX-NEXT: retq 6667; 6668; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6669; NoVLX: # %bb.0: # %entry 6670; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6671; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 6672; NoVLX-NEXT: kmovw %edi, %k1 6673; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6674; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6675; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6676; NoVLX-NEXT: kmovw %k0, %eax 6677; NoVLX-NEXT: movzwl %ax, %eax 6678; NoVLX-NEXT: vzeroupper 6679; NoVLX-NEXT: retq 6680entry: 6681 %0 = bitcast <2 x i64> %__a to <4 x i32> 6682 %load = load i32, i32* %__b 6683 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6684 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6685 %2 = icmp sgt <4 x i32> %0, %1 6686 %3 = bitcast i8 %__u to <8 x i1> 6687 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6688 %4 = and <4 x i1> %extract.i, %2 6689 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6690 %6 = bitcast <64 x i1> %5 to i64 6691 ret i64 %6 6692} 6693 6694 6695define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6696; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: 6697; VLX: # %bb.0: # %entry 6698; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 6699; VLX-NEXT: kmovd %k0, %eax 6700; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6701; VLX-NEXT: vzeroupper 6702; VLX-NEXT: retq 6703; 6704; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: 6705; NoVLX: # %bb.0: # %entry 6706; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6707; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6708; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6709; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6710; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6711; NoVLX-NEXT: kmovw %k0, %eax 6712; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6713; NoVLX-NEXT: vzeroupper 6714; NoVLX-NEXT: retq 6715entry: 6716 %0 = bitcast <4 x i64> %__a to <8 x i32> 6717 %1 = bitcast <4 x i64> %__b to <8 x i32> 6718 %2 = icmp sgt <8 x i32> %0, %1 6719 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6720 %4 = bitcast <16 x i1> %3 to i16 6721 ret i16 %4 6722} 6723 6724define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 6725; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: 6726; VLX: # %bb.0: # %entry 6727; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 6728; VLX-NEXT: kmovd %k0, %eax 6729; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6730; VLX-NEXT: vzeroupper 6731; VLX-NEXT: retq 6732; 6733; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: 6734; NoVLX: # %bb.0: # %entry 6735; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6736; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 6737; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6738; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6739; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6740; NoVLX-NEXT: kmovw %k0, %eax 6741; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6742; NoVLX-NEXT: vzeroupper 6743; NoVLX-NEXT: retq 6744entry: 6745 %0 = bitcast <4 x i64> %__a to <8 x i32> 6746 %load = load <4 x i64>, <4 x i64>* %__b 6747 %1 = bitcast <4 x i64> %load to <8 x i32> 6748 %2 = icmp sgt <8 x i32> %0, %1 6749 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6750 %4 = bitcast <16 x i1> %3 to i16 6751 ret i16 %4 6752} 6753 6754define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6755; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: 6756; VLX: # %bb.0: # %entry 6757; VLX-NEXT: kmovd %edi, %k1 6758; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 6759; VLX-NEXT: kmovd %k0, %eax 6760; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6761; VLX-NEXT: vzeroupper 6762; VLX-NEXT: retq 6763; 6764; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: 6765; NoVLX: # %bb.0: # %entry 6766; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6767; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6768; NoVLX-NEXT: kmovw %edi, %k1 6769; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6770; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6771; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6772; NoVLX-NEXT: kmovw %k0, %eax 6773; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6774; NoVLX-NEXT: vzeroupper 6775; NoVLX-NEXT: retq 6776entry: 6777 %0 = bitcast <4 x i64> %__a to <8 x i32> 6778 %1 = bitcast <4 x i64> %__b to <8 x i32> 6779 %2 = icmp sgt <8 x i32> %0, %1 6780 %3 = bitcast i8 %__u to <8 x i1> 6781 %4 = and <8 x i1> %2, %3 6782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6783 %6 = bitcast <16 x i1> %5 to i16 6784 ret i16 %6 6785} 6786 6787define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 6788; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: 6789; VLX: # %bb.0: # %entry 6790; VLX-NEXT: kmovd %edi, %k1 6791; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 6792; VLX-NEXT: kmovd %k0, %eax 6793; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6794; VLX-NEXT: vzeroupper 6795; VLX-NEXT: retq 6796; 6797; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: 6798; NoVLX: # %bb.0: # %entry 6799; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6800; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 6801; NoVLX-NEXT: kmovw %edi, %k1 6802; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6803; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6804; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6805; NoVLX-NEXT: kmovw %k0, %eax 6806; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6807; NoVLX-NEXT: vzeroupper 6808; NoVLX-NEXT: retq 6809entry: 6810 %0 = bitcast <4 x i64> %__a to <8 x i32> 6811 %load = load <4 x i64>, <4 x i64>* %__b 6812 %1 = bitcast <4 x i64> %load to <8 x i32> 6813 %2 = icmp sgt <8 x i32> %0, %1 6814 %3 = bitcast i8 %__u to <8 x i1> 6815 %4 = and <8 x i1> %2, %3 6816 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6817 %6 = bitcast <16 x i1> %5 to i16 6818 ret i16 %6 6819} 6820 6821 6822define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 6823; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6824; VLX: # %bb.0: # %entry 6825; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 6826; VLX-NEXT: kmovd %k0, %eax 6827; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6828; VLX-NEXT: vzeroupper 6829; VLX-NEXT: retq 6830; 6831; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6832; NoVLX: # %bb.0: # %entry 6833; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6834; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 6835; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6836; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6837; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6838; NoVLX-NEXT: kmovw %k0, %eax 6839; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6840; NoVLX-NEXT: vzeroupper 6841; NoVLX-NEXT: retq 6842entry: 6843 %0 = bitcast <4 x i64> %__a to <8 x i32> 6844 %load = load i32, i32* %__b 6845 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6846 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6847 %2 = icmp sgt <8 x i32> %0, %1 6848 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6849 %4 = bitcast <16 x i1> %3 to i16 6850 ret i16 %4 6851} 6852 6853define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 6854; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6855; VLX: # %bb.0: # %entry 6856; VLX-NEXT: kmovd %edi, %k1 6857; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 6858; VLX-NEXT: kmovd %k0, %eax 6859; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6860; VLX-NEXT: vzeroupper 6861; VLX-NEXT: retq 6862; 6863; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6864; NoVLX: # %bb.0: # %entry 6865; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6866; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 6867; NoVLX-NEXT: kmovw %edi, %k1 6868; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6869; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6870; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6871; NoVLX-NEXT: kmovw %k0, %eax 6872; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6873; NoVLX-NEXT: vzeroupper 6874; NoVLX-NEXT: retq 6875entry: 6876 %0 = bitcast <4 x i64> %__a to <8 x i32> 6877 %load = load i32, i32* %__b 6878 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6879 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6880 %2 = icmp sgt <8 x i32> %0, %1 6881 %3 = bitcast i8 %__u to <8 x i1> 6882 %4 = and <8 x i1> %3, %2 6883 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6884 %6 = bitcast <16 x i1> %5 to i16 6885 ret i16 %6 6886} 6887 6888 6889define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6890; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: 6891; VLX: # %bb.0: # %entry 6892; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 6893; VLX-NEXT: kmovd %k0, %eax 6894; VLX-NEXT: vzeroupper 6895; VLX-NEXT: retq 6896; 6897; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: 6898; NoVLX: # %bb.0: # %entry 6899; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6900; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6901; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6902; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6903; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6904; NoVLX-NEXT: kmovw %k0, %eax 6905; NoVLX-NEXT: vzeroupper 6906; NoVLX-NEXT: retq 6907entry: 6908 %0 = bitcast <4 x i64> %__a to <8 x i32> 6909 %1 = bitcast <4 x i64> %__b to <8 x i32> 6910 %2 = icmp sgt <8 x i32> %0, %1 6911 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6912 %4 = bitcast <32 x i1> %3 to i32 6913 ret i32 %4 6914} 6915 6916define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 6917; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: 6918; VLX: # %bb.0: # %entry 6919; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 6920; VLX-NEXT: kmovd %k0, %eax 6921; VLX-NEXT: vzeroupper 6922; VLX-NEXT: retq 6923; 6924; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: 6925; NoVLX: # %bb.0: # %entry 6926; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6927; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 6928; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6929; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6930; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6931; NoVLX-NEXT: kmovw %k0, %eax 6932; NoVLX-NEXT: vzeroupper 6933; NoVLX-NEXT: retq 6934entry: 6935 %0 = bitcast <4 x i64> %__a to <8 x i32> 6936 %load = load <4 x i64>, <4 x i64>* %__b 6937 %1 = bitcast <4 x i64> %load to <8 x i32> 6938 %2 = icmp sgt <8 x i32> %0, %1 6939 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6940 %4 = bitcast <32 x i1> %3 to i32 6941 ret i32 %4 6942} 6943 6944define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6945; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: 6946; VLX: # %bb.0: # %entry 6947; VLX-NEXT: kmovd %edi, %k1 6948; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 6949; VLX-NEXT: kmovd %k0, %eax 6950; VLX-NEXT: vzeroupper 6951; VLX-NEXT: retq 6952; 6953; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: 6954; NoVLX: # %bb.0: # %entry 6955; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6956; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6957; NoVLX-NEXT: kmovw %edi, %k1 6958; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6959; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6960; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6961; NoVLX-NEXT: kmovw %k0, %eax 6962; NoVLX-NEXT: vzeroupper 6963; NoVLX-NEXT: retq 6964entry: 6965 %0 = bitcast <4 x i64> %__a to <8 x i32> 6966 %1 = bitcast <4 x i64> %__b to <8 x i32> 6967 %2 = icmp sgt <8 x i32> %0, %1 6968 %3 = bitcast i8 %__u to <8 x i1> 6969 %4 = and <8 x i1> %2, %3 6970 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6971 %6 = bitcast <32 x i1> %5 to i32 6972 ret i32 %6 6973} 6974 6975define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 6976; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: 6977; VLX: # %bb.0: # %entry 6978; VLX-NEXT: kmovd %edi, %k1 6979; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 6980; VLX-NEXT: kmovd %k0, %eax 6981; VLX-NEXT: vzeroupper 6982; VLX-NEXT: retq 6983; 6984; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: 6985; NoVLX: # %bb.0: # %entry 6986; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6987; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 6988; NoVLX-NEXT: kmovw %edi, %k1 6989; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6990; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6991; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6992; NoVLX-NEXT: kmovw %k0, %eax 6993; NoVLX-NEXT: vzeroupper 6994; NoVLX-NEXT: retq 6995entry: 6996 %0 = bitcast <4 x i64> %__a to <8 x i32> 6997 %load = load <4 x i64>, <4 x i64>* %__b 6998 %1 = bitcast <4 x i64> %load to <8 x i32> 6999 %2 = icmp sgt <8 x i32> %0, %1 7000 %3 = bitcast i8 %__u to <8 x i1> 7001 %4 = and <8 x i1> %2, %3 7002 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7003 %6 = bitcast <32 x i1> %5 to i32 7004 ret i32 %6 7005} 7006 7007 7008define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 7009; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 7010; VLX: # %bb.0: # %entry 7011; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 7012; VLX-NEXT: kmovd %k0, %eax 7013; VLX-NEXT: vzeroupper 7014; VLX-NEXT: retq 7015; 7016; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 7017; NoVLX: # %bb.0: # %entry 7018; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7019; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 7020; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7021; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7022; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7023; NoVLX-NEXT: kmovw %k0, %eax 7024; NoVLX-NEXT: vzeroupper 7025; NoVLX-NEXT: retq 7026entry: 7027 %0 = bitcast <4 x i64> %__a to <8 x i32> 7028 %load = load i32, i32* %__b 7029 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7030 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7031 %2 = icmp sgt <8 x i32> %0, %1 7032 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7033 %4 = bitcast <32 x i1> %3 to i32 7034 ret i32 %4 7035} 7036 7037define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 7038; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 7039; VLX: # %bb.0: # %entry 7040; VLX-NEXT: kmovd %edi, %k1 7041; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 7042; VLX-NEXT: kmovd %k0, %eax 7043; VLX-NEXT: vzeroupper 7044; VLX-NEXT: retq 7045; 7046; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 7047; NoVLX: # %bb.0: # %entry 7048; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7049; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 7050; NoVLX-NEXT: kmovw %edi, %k1 7051; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7052; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7053; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7054; NoVLX-NEXT: kmovw %k0, %eax 7055; NoVLX-NEXT: vzeroupper 7056; NoVLX-NEXT: retq 7057entry: 7058 %0 = bitcast <4 x i64> %__a to <8 x i32> 7059 %load = load i32, i32* %__b 7060 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7061 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7062 %2 = icmp sgt <8 x i32> %0, %1 7063 %3 = bitcast i8 %__u to <8 x i1> 7064 %4 = and <8 x i1> %3, %2 7065 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7066 %6 = bitcast <32 x i1> %5 to i32 7067 ret i32 %6 7068} 7069 7070 7071define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 7072; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: 7073; VLX: # %bb.0: # %entry 7074; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 7075; VLX-NEXT: kmovq %k0, %rax 7076; VLX-NEXT: vzeroupper 7077; VLX-NEXT: retq 7078; 7079; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: 7080; NoVLX: # %bb.0: # %entry 7081; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 7082; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7083; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7084; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7085; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7086; NoVLX-NEXT: kmovw %k0, %eax 7087; NoVLX-NEXT: movzwl %ax, %eax 7088; NoVLX-NEXT: vzeroupper 7089; NoVLX-NEXT: retq 7090entry: 7091 %0 = bitcast <4 x i64> %__a to <8 x i32> 7092 %1 = bitcast <4 x i64> %__b to <8 x i32> 7093 %2 = icmp sgt <8 x i32> %0, %1 7094 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7095 %4 = bitcast <64 x i1> %3 to i64 7096 ret i64 %4 7097} 7098 7099define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 7100; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: 7101; VLX: # %bb.0: # %entry 7102; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 7103; VLX-NEXT: kmovq %k0, %rax 7104; VLX-NEXT: vzeroupper 7105; VLX-NEXT: retq 7106; 7107; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: 7108; NoVLX: # %bb.0: # %entry 7109; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7110; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 7111; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7112; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7113; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7114; NoVLX-NEXT: kmovw %k0, %eax 7115; NoVLX-NEXT: movzwl %ax, %eax 7116; NoVLX-NEXT: vzeroupper 7117; NoVLX-NEXT: retq 7118entry: 7119 %0 = bitcast <4 x i64> %__a to <8 x i32> 7120 %load = load <4 x i64>, <4 x i64>* %__b 7121 %1 = bitcast <4 x i64> %load to <8 x i32> 7122 %2 = icmp sgt <8 x i32> %0, %1 7123 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7124 %4 = bitcast <64 x i1> %3 to i64 7125 ret i64 %4 7126} 7127 7128define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 7129; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: 7130; VLX: # %bb.0: # %entry 7131; VLX-NEXT: kmovd %edi, %k1 7132; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 7133; VLX-NEXT: kmovq %k0, %rax 7134; VLX-NEXT: vzeroupper 7135; VLX-NEXT: retq 7136; 7137; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: 7138; NoVLX: # %bb.0: # %entry 7139; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 7140; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7141; NoVLX-NEXT: kmovw %edi, %k1 7142; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7143; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7144; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7145; NoVLX-NEXT: kmovw %k0, %eax 7146; NoVLX-NEXT: movzwl %ax, %eax 7147; NoVLX-NEXT: vzeroupper 7148; NoVLX-NEXT: retq 7149entry: 7150 %0 = bitcast <4 x i64> %__a to <8 x i32> 7151 %1 = bitcast <4 x i64> %__b to <8 x i32> 7152 %2 = icmp sgt <8 x i32> %0, %1 7153 %3 = bitcast i8 %__u to <8 x i1> 7154 %4 = and <8 x i1> %2, %3 7155 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7156 %6 = bitcast <64 x i1> %5 to i64 7157 ret i64 %6 7158} 7159 7160define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 7161; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: 7162; VLX: # %bb.0: # %entry 7163; VLX-NEXT: kmovd %edi, %k1 7164; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 7165; VLX-NEXT: kmovq %k0, %rax 7166; VLX-NEXT: vzeroupper 7167; VLX-NEXT: retq 7168; 7169; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: 7170; NoVLX: # %bb.0: # %entry 7171; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7172; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 7173; NoVLX-NEXT: kmovw %edi, %k1 7174; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7175; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7176; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7177; NoVLX-NEXT: kmovw %k0, %eax 7178; NoVLX-NEXT: movzwl %ax, %eax 7179; NoVLX-NEXT: vzeroupper 7180; NoVLX-NEXT: retq 7181entry: 7182 %0 = bitcast <4 x i64> %__a to <8 x i32> 7183 %load = load <4 x i64>, <4 x i64>* %__b 7184 %1 = bitcast <4 x i64> %load to <8 x i32> 7185 %2 = icmp sgt <8 x i32> %0, %1 7186 %3 = bitcast i8 %__u to <8 x i1> 7187 %4 = and <8 x i1> %2, %3 7188 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7189 %6 = bitcast <64 x i1> %5 to i64 7190 ret i64 %6 7191} 7192 7193 7194define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 7195; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7196; VLX: # %bb.0: # %entry 7197; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 7198; VLX-NEXT: kmovq %k0, %rax 7199; VLX-NEXT: vzeroupper 7200; VLX-NEXT: retq 7201; 7202; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7203; NoVLX: # %bb.0: # %entry 7204; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7205; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 7206; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7207; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7208; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7209; NoVLX-NEXT: kmovw %k0, %eax 7210; NoVLX-NEXT: movzwl %ax, %eax 7211; NoVLX-NEXT: vzeroupper 7212; NoVLX-NEXT: retq 7213entry: 7214 %0 = bitcast <4 x i64> %__a to <8 x i32> 7215 %load = load i32, i32* %__b 7216 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7217 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7218 %2 = icmp sgt <8 x i32> %0, %1 7219 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7220 %4 = bitcast <64 x i1> %3 to i64 7221 ret i64 %4 7222} 7223 7224define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 7225; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7226; VLX: # %bb.0: # %entry 7227; VLX-NEXT: kmovd %edi, %k1 7228; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 7229; VLX-NEXT: kmovq %k0, %rax 7230; VLX-NEXT: vzeroupper 7231; VLX-NEXT: retq 7232; 7233; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7234; NoVLX: # %bb.0: # %entry 7235; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7236; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 7237; NoVLX-NEXT: kmovw %edi, %k1 7238; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7239; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7240; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7241; NoVLX-NEXT: kmovw %k0, %eax 7242; NoVLX-NEXT: movzwl %ax, %eax 7243; NoVLX-NEXT: vzeroupper 7244; NoVLX-NEXT: retq 7245entry: 7246 %0 = bitcast <4 x i64> %__a to <8 x i32> 7247 %load = load i32, i32* %__b 7248 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7249 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7250 %2 = icmp sgt <8 x i32> %0, %1 7251 %3 = bitcast i8 %__u to <8 x i1> 7252 %4 = and <8 x i1> %3, %2 7253 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7254 %6 = bitcast <64 x i1> %5 to i64 7255 ret i64 %6 7256} 7257 7258 7259define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7260; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: 7261; VLX: # %bb.0: # %entry 7262; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7263; VLX-NEXT: kmovd %k0, %eax 7264; VLX-NEXT: vzeroupper 7265; VLX-NEXT: retq 7266; 7267; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: 7268; NoVLX: # %bb.0: # %entry 7269; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7270; NoVLX-NEXT: kmovw %k0, %eax 7271; NoVLX-NEXT: vzeroupper 7272; NoVLX-NEXT: retq 7273entry: 7274 %0 = bitcast <8 x i64> %__a to <16 x i32> 7275 %1 = bitcast <8 x i64> %__b to <16 x i32> 7276 %2 = icmp sgt <16 x i32> %0, %1 7277 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7278 %4 = bitcast <32 x i1> %3 to i32 7279 ret i32 %4 7280} 7281 7282define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 7283; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: 7284; VLX: # %bb.0: # %entry 7285; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7286; VLX-NEXT: kmovd %k0, %eax 7287; VLX-NEXT: vzeroupper 7288; VLX-NEXT: retq 7289; 7290; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: 7291; NoVLX: # %bb.0: # %entry 7292; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7293; NoVLX-NEXT: kmovw %k0, %eax 7294; NoVLX-NEXT: vzeroupper 7295; NoVLX-NEXT: retq 7296entry: 7297 %0 = bitcast <8 x i64> %__a to <16 x i32> 7298 %load = load <8 x i64>, <8 x i64>* %__b 7299 %1 = bitcast <8 x i64> %load to <16 x i32> 7300 %2 = icmp sgt <16 x i32> %0, %1 7301 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7302 %4 = bitcast <32 x i1> %3 to i32 7303 ret i32 %4 7304} 7305 7306define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7307; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: 7308; VLX: # %bb.0: # %entry 7309; VLX-NEXT: kmovd %edi, %k1 7310; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7311; VLX-NEXT: kmovd %k0, %eax 7312; VLX-NEXT: vzeroupper 7313; VLX-NEXT: retq 7314; 7315; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: 7316; NoVLX: # %bb.0: # %entry 7317; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7318; NoVLX-NEXT: kmovw %k0, %eax 7319; NoVLX-NEXT: andl %edi, %eax 7320; NoVLX-NEXT: vzeroupper 7321; NoVLX-NEXT: retq 7322entry: 7323 %0 = bitcast <8 x i64> %__a to <16 x i32> 7324 %1 = bitcast <8 x i64> %__b to <16 x i32> 7325 %2 = icmp sgt <16 x i32> %0, %1 7326 %3 = bitcast i16 %__u to <16 x i1> 7327 %4 = and <16 x i1> %2, %3 7328 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7329 %6 = bitcast <32 x i1> %5 to i32 7330 ret i32 %6 7331} 7332 7333define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 7334; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: 7335; VLX: # %bb.0: # %entry 7336; VLX-NEXT: kmovd %edi, %k1 7337; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} 7338; VLX-NEXT: kmovd %k0, %eax 7339; VLX-NEXT: vzeroupper 7340; VLX-NEXT: retq 7341; 7342; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: 7343; NoVLX: # %bb.0: # %entry 7344; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 7345; NoVLX-NEXT: kmovw %k0, %eax 7346; NoVLX-NEXT: andl %edi, %eax 7347; NoVLX-NEXT: vzeroupper 7348; NoVLX-NEXT: retq 7349entry: 7350 %0 = bitcast <8 x i64> %__a to <16 x i32> 7351 %load = load <8 x i64>, <8 x i64>* %__b 7352 %1 = bitcast <8 x i64> %load to <16 x i32> 7353 %2 = icmp sgt <16 x i32> %0, %1 7354 %3 = bitcast i16 %__u to <16 x i1> 7355 %4 = and <16 x i1> %2, %3 7356 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7357 %6 = bitcast <32 x i1> %5 to i32 7358 ret i32 %6 7359} 7360 7361 7362define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 7363; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7364; VLX: # %bb.0: # %entry 7365; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7366; VLX-NEXT: kmovd %k0, %eax 7367; VLX-NEXT: vzeroupper 7368; VLX-NEXT: retq 7369; 7370; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7371; NoVLX: # %bb.0: # %entry 7372; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7373; NoVLX-NEXT: kmovw %k0, %eax 7374; NoVLX-NEXT: vzeroupper 7375; NoVLX-NEXT: retq 7376entry: 7377 %0 = bitcast <8 x i64> %__a to <16 x i32> 7378 %load = load i32, i32* %__b 7379 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7380 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7381 %2 = icmp sgt <16 x i32> %0, %1 7382 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7383 %4 = bitcast <32 x i1> %3 to i32 7384 ret i32 %4 7385} 7386 7387define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 7388; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7389; VLX: # %bb.0: # %entry 7390; VLX-NEXT: kmovd %edi, %k1 7391; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 7392; VLX-NEXT: kmovd %k0, %eax 7393; VLX-NEXT: vzeroupper 7394; VLX-NEXT: retq 7395; 7396; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7397; NoVLX: # %bb.0: # %entry 7398; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 7399; NoVLX-NEXT: kmovw %k0, %eax 7400; NoVLX-NEXT: andl %edi, %eax 7401; NoVLX-NEXT: vzeroupper 7402; NoVLX-NEXT: retq 7403entry: 7404 %0 = bitcast <8 x i64> %__a to <16 x i32> 7405 %load = load i32, i32* %__b 7406 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7407 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7408 %2 = icmp sgt <16 x i32> %0, %1 7409 %3 = bitcast i16 %__u to <16 x i1> 7410 %4 = and <16 x i1> %3, %2 7411 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7412 %6 = bitcast <32 x i1> %5 to i32 7413 ret i32 %6 7414} 7415 7416 7417define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7418; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: 7419; VLX: # %bb.0: # %entry 7420; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7421; VLX-NEXT: kmovq %k0, %rax 7422; VLX-NEXT: vzeroupper 7423; VLX-NEXT: retq 7424; 7425; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: 7426; NoVLX: # %bb.0: # %entry 7427; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7428; NoVLX-NEXT: kmovw %k0, %eax 7429; NoVLX-NEXT: movzwl %ax, %eax 7430; NoVLX-NEXT: vzeroupper 7431; NoVLX-NEXT: retq 7432entry: 7433 %0 = bitcast <8 x i64> %__a to <16 x i32> 7434 %1 = bitcast <8 x i64> %__b to <16 x i32> 7435 %2 = icmp sgt <16 x i32> %0, %1 7436 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7437 %4 = bitcast <64 x i1> %3 to i64 7438 ret i64 %4 7439} 7440 7441define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 7442; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: 7443; VLX: # %bb.0: # %entry 7444; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7445; VLX-NEXT: kmovq %k0, %rax 7446; VLX-NEXT: vzeroupper 7447; VLX-NEXT: retq 7448; 7449; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: 7450; NoVLX: # %bb.0: # %entry 7451; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7452; NoVLX-NEXT: kmovw %k0, %eax 7453; NoVLX-NEXT: movzwl %ax, %eax 7454; NoVLX-NEXT: vzeroupper 7455; NoVLX-NEXT: retq 7456entry: 7457 %0 = bitcast <8 x i64> %__a to <16 x i32> 7458 %load = load <8 x i64>, <8 x i64>* %__b 7459 %1 = bitcast <8 x i64> %load to <16 x i32> 7460 %2 = icmp sgt <16 x i32> %0, %1 7461 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7462 %4 = bitcast <64 x i1> %3 to i64 7463 ret i64 %4 7464} 7465 7466define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7467; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: 7468; VLX: # %bb.0: # %entry 7469; VLX-NEXT: kmovd %edi, %k1 7470; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7471; VLX-NEXT: kmovq %k0, %rax 7472; VLX-NEXT: vzeroupper 7473; VLX-NEXT: retq 7474; 7475; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: 7476; NoVLX: # %bb.0: # %entry 7477; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7478; NoVLX-NEXT: kmovw %k0, %eax 7479; NoVLX-NEXT: andl %edi, %eax 7480; NoVLX-NEXT: vzeroupper 7481; NoVLX-NEXT: retq 7482entry: 7483 %0 = bitcast <8 x i64> %__a to <16 x i32> 7484 %1 = bitcast <8 x i64> %__b to <16 x i32> 7485 %2 = icmp sgt <16 x i32> %0, %1 7486 %3 = bitcast i16 %__u to <16 x i1> 7487 %4 = and <16 x i1> %2, %3 7488 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7489 %6 = bitcast <64 x i1> %5 to i64 7490 ret i64 %6 7491} 7492 7493define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 7494; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: 7495; VLX: # %bb.0: # %entry 7496; VLX-NEXT: kmovd %edi, %k1 7497; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} 7498; VLX-NEXT: kmovq %k0, %rax 7499; VLX-NEXT: vzeroupper 7500; VLX-NEXT: retq 7501; 7502; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: 7503; NoVLX: # %bb.0: # %entry 7504; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 7505; NoVLX-NEXT: kmovw %k0, %eax 7506; NoVLX-NEXT: andl %edi, %eax 7507; NoVLX-NEXT: vzeroupper 7508; NoVLX-NEXT: retq 7509entry: 7510 %0 = bitcast <8 x i64> %__a to <16 x i32> 7511 %load = load <8 x i64>, <8 x i64>* %__b 7512 %1 = bitcast <8 x i64> %load to <16 x i32> 7513 %2 = icmp sgt <16 x i32> %0, %1 7514 %3 = bitcast i16 %__u to <16 x i1> 7515 %4 = and <16 x i1> %2, %3 7516 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7517 %6 = bitcast <64 x i1> %5 to i64 7518 ret i64 %6 7519} 7520 7521 7522define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 7523; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7524; VLX: # %bb.0: # %entry 7525; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7526; VLX-NEXT: kmovq %k0, %rax 7527; VLX-NEXT: vzeroupper 7528; VLX-NEXT: retq 7529; 7530; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7531; NoVLX: # %bb.0: # %entry 7532; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7533; NoVLX-NEXT: kmovw %k0, %eax 7534; NoVLX-NEXT: movzwl %ax, %eax 7535; NoVLX-NEXT: vzeroupper 7536; NoVLX-NEXT: retq 7537entry: 7538 %0 = bitcast <8 x i64> %__a to <16 x i32> 7539 %load = load i32, i32* %__b 7540 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7541 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7542 %2 = icmp sgt <16 x i32> %0, %1 7543 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7544 %4 = bitcast <64 x i1> %3 to i64 7545 ret i64 %4 7546} 7547 7548define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 7549; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7550; VLX: # %bb.0: # %entry 7551; VLX-NEXT: kmovd %edi, %k1 7552; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 7553; VLX-NEXT: kmovq %k0, %rax 7554; VLX-NEXT: vzeroupper 7555; VLX-NEXT: retq 7556; 7557; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7558; NoVLX: # %bb.0: # %entry 7559; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 7560; NoVLX-NEXT: kmovw %k0, %eax 7561; NoVLX-NEXT: andl %edi, %eax 7562; NoVLX-NEXT: vzeroupper 7563; NoVLX-NEXT: retq 7564entry: 7565 %0 = bitcast <8 x i64> %__a to <16 x i32> 7566 %load = load i32, i32* %__b 7567 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7568 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7569 %2 = icmp sgt <16 x i32> %0, %1 7570 %3 = bitcast i16 %__u to <16 x i1> 7571 %4 = and <16 x i1> %3, %2 7572 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7573 %6 = bitcast <64 x i1> %5 to i64 7574 ret i64 %6 7575} 7576 7577 7578define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7579; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: 7580; VLX: # %bb.0: # %entry 7581; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7582; VLX-NEXT: kmovb %k0, %eax 7583; VLX-NEXT: retq 7584; 7585; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: 7586; NoVLX: # %bb.0: # %entry 7587; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7588; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7589; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7590; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7591; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7592; NoVLX-NEXT: kmovw %k0, %eax 7593; NoVLX-NEXT: andl $3, %eax 7594; NoVLX-NEXT: vzeroupper 7595; NoVLX-NEXT: retq 7596entry: 7597 %0 = bitcast <2 x i64> %__a to <2 x i64> 7598 %1 = bitcast <2 x i64> %__b to <2 x i64> 7599 %2 = icmp sgt <2 x i64> %0, %1 7600 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7601 %4 = bitcast <4 x i1> %3 to i4 7602 ret i4 %4 7603} 7604 7605define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 7606; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: 7607; VLX: # %bb.0: # %entry 7608; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7609; VLX-NEXT: kmovb %k0, %eax 7610; VLX-NEXT: retq 7611; 7612; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: 7613; NoVLX: # %bb.0: # %entry 7614; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7615; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7616; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7617; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7618; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7619; NoVLX-NEXT: kmovw %k0, %eax 7620; NoVLX-NEXT: andl $3, %eax 7621; NoVLX-NEXT: vzeroupper 7622; NoVLX-NEXT: retq 7623entry: 7624 %0 = bitcast <2 x i64> %__a to <2 x i64> 7625 %load = load <2 x i64>, <2 x i64>* %__b 7626 %1 = bitcast <2 x i64> %load to <2 x i64> 7627 %2 = icmp sgt <2 x i64> %0, %1 7628 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7629 %4 = bitcast <4 x i1> %3 to i4 7630 ret i4 %4 7631} 7632 7633define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7634; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: 7635; VLX: # %bb.0: # %entry 7636; VLX-NEXT: kmovd %edi, %k1 7637; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 7638; VLX-NEXT: kmovb %k0, %eax 7639; VLX-NEXT: retq 7640; 7641; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: 7642; NoVLX: # %bb.0: # %entry 7643; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7644; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7645; NoVLX-NEXT: kmovw %edi, %k1 7646; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7647; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7648; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7649; NoVLX-NEXT: kmovw %k0, %eax 7650; NoVLX-NEXT: andl $3, %eax 7651; NoVLX-NEXT: vzeroupper 7652; NoVLX-NEXT: retq 7653entry: 7654 %0 = bitcast <2 x i64> %__a to <2 x i64> 7655 %1 = bitcast <2 x i64> %__b to <2 x i64> 7656 %2 = icmp sgt <2 x i64> %0, %1 7657 %3 = bitcast i8 %__u to <8 x i1> 7658 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7659 %4 = and <2 x i1> %2, %extract.i 7660 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7661 %6 = bitcast <4 x i1> %5 to i4 7662 ret i4 %6 7663} 7664 7665define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 7666; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: 7667; VLX: # %bb.0: # %entry 7668; VLX-NEXT: kmovd %edi, %k1 7669; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 7670; VLX-NEXT: kmovb %k0, %eax 7671; VLX-NEXT: retq 7672; 7673; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: 7674; NoVLX: # %bb.0: # %entry 7675; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7676; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 7677; NoVLX-NEXT: kmovw %edi, %k1 7678; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7679; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7680; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7681; NoVLX-NEXT: kmovw %k0, %eax 7682; NoVLX-NEXT: andl $3, %eax 7683; NoVLX-NEXT: vzeroupper 7684; NoVLX-NEXT: retq 7685entry: 7686 %0 = bitcast <2 x i64> %__a to <2 x i64> 7687 %load = load <2 x i64>, <2 x i64>* %__b 7688 %1 = bitcast <2 x i64> %load to <2 x i64> 7689 %2 = icmp sgt <2 x i64> %0, %1 7690 %3 = bitcast i8 %__u to <8 x i1> 7691 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7692 %4 = and <2 x i1> %2, %extract.i 7693 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7694 %6 = bitcast <4 x i1> %5 to i4 7695 ret i4 %6 7696} 7697 7698 7699define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 7700; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7701; VLX: # %bb.0: # %entry 7702; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 7703; VLX-NEXT: kmovb %k0, %eax 7704; VLX-NEXT: retq 7705; 7706; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7707; NoVLX: # %bb.0: # %entry 7708; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7709; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 7710; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7711; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7712; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7713; NoVLX-NEXT: kmovw %k0, %eax 7714; NoVLX-NEXT: andl $3, %eax 7715; NoVLX-NEXT: vzeroupper 7716; NoVLX-NEXT: retq 7717entry: 7718 %0 = bitcast <2 x i64> %__a to <2 x i64> 7719 %load = load i64, i64* %__b 7720 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7721 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7722 %2 = icmp sgt <2 x i64> %0, %1 7723 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7724 %4 = bitcast <4 x i1> %3 to i4 7725 ret i4 %4 7726} 7727 7728define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 7729; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7730; VLX: # %bb.0: # %entry 7731; VLX-NEXT: kmovd %edi, %k1 7732; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 7733; VLX-NEXT: kmovb %k0, %eax 7734; VLX-NEXT: retq 7735; 7736; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7737; NoVLX: # %bb.0: # %entry 7738; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7739; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 7740; NoVLX-NEXT: kmovw %edi, %k1 7741; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7742; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7743; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7744; NoVLX-NEXT: kmovw %k0, %eax 7745; NoVLX-NEXT: andl $3, %eax 7746; NoVLX-NEXT: vzeroupper 7747; NoVLX-NEXT: retq 7748entry: 7749 %0 = bitcast <2 x i64> %__a to <2 x i64> 7750 %load = load i64, i64* %__b 7751 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7752 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7753 %2 = icmp sgt <2 x i64> %0, %1 7754 %3 = bitcast i8 %__u to <8 x i1> 7755 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7756 %4 = and <2 x i1> %extract.i, %2 7757 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7758 %6 = bitcast <4 x i1> %5 to i4 7759 ret i4 %6 7760} 7761 7762 7763define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7764; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: 7765; VLX: # %bb.0: # %entry 7766; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7767; VLX-NEXT: kmovd %k0, %eax 7768; VLX-NEXT: # kill: def $al killed $al killed $eax 7769; VLX-NEXT: retq 7770; 7771; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: 7772; NoVLX: # %bb.0: # %entry 7773; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7774; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7775; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7776; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7777; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7778; NoVLX-NEXT: kmovw %k0, %eax 7779; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7780; NoVLX-NEXT: vzeroupper 7781; NoVLX-NEXT: retq 7782entry: 7783 %0 = bitcast <2 x i64> %__a to <2 x i64> 7784 %1 = bitcast <2 x i64> %__b to <2 x i64> 7785 %2 = icmp sgt <2 x i64> %0, %1 7786 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7787 %4 = bitcast <8 x i1> %3 to i8 7788 ret i8 %4 7789} 7790 7791define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 7792; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: 7793; VLX: # %bb.0: # %entry 7794; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7795; VLX-NEXT: kmovd %k0, %eax 7796; VLX-NEXT: # kill: def $al killed $al killed $eax 7797; VLX-NEXT: retq 7798; 7799; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: 7800; NoVLX: # %bb.0: # %entry 7801; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7802; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7803; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7804; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7805; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7806; NoVLX-NEXT: kmovw %k0, %eax 7807; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7808; NoVLX-NEXT: vzeroupper 7809; NoVLX-NEXT: retq 7810entry: 7811 %0 = bitcast <2 x i64> %__a to <2 x i64> 7812 %load = load <2 x i64>, <2 x i64>* %__b 7813 %1 = bitcast <2 x i64> %load to <2 x i64> 7814 %2 = icmp sgt <2 x i64> %0, %1 7815 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7816 %4 = bitcast <8 x i1> %3 to i8 7817 ret i8 %4 7818} 7819 7820define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7821; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: 7822; VLX: # %bb.0: # %entry 7823; VLX-NEXT: kmovd %edi, %k1 7824; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 7825; VLX-NEXT: kmovd %k0, %eax 7826; VLX-NEXT: # kill: def $al killed $al killed $eax 7827; VLX-NEXT: retq 7828; 7829; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: 7830; NoVLX: # %bb.0: # %entry 7831; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7832; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7833; NoVLX-NEXT: kmovw %edi, %k1 7834; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7835; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7836; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7837; NoVLX-NEXT: kmovw %k0, %eax 7838; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7839; NoVLX-NEXT: vzeroupper 7840; NoVLX-NEXT: retq 7841entry: 7842 %0 = bitcast <2 x i64> %__a to <2 x i64> 7843 %1 = bitcast <2 x i64> %__b to <2 x i64> 7844 %2 = icmp sgt <2 x i64> %0, %1 7845 %3 = bitcast i8 %__u to <8 x i1> 7846 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7847 %4 = and <2 x i1> %2, %extract.i 7848 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7849 %6 = bitcast <8 x i1> %5 to i8 7850 ret i8 %6 7851} 7852 7853define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 7854; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: 7855; VLX: # %bb.0: # %entry 7856; VLX-NEXT: kmovd %edi, %k1 7857; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 7858; VLX-NEXT: kmovd %k0, %eax 7859; VLX-NEXT: # kill: def $al killed $al killed $eax 7860; VLX-NEXT: retq 7861; 7862; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: 7863; NoVLX: # %bb.0: # %entry 7864; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7865; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 7866; NoVLX-NEXT: kmovw %edi, %k1 7867; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7868; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7869; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7870; NoVLX-NEXT: kmovw %k0, %eax 7871; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7872; NoVLX-NEXT: vzeroupper 7873; NoVLX-NEXT: retq 7874entry: 7875 %0 = bitcast <2 x i64> %__a to <2 x i64> 7876 %load = load <2 x i64>, <2 x i64>* %__b 7877 %1 = bitcast <2 x i64> %load to <2 x i64> 7878 %2 = icmp sgt <2 x i64> %0, %1 7879 %3 = bitcast i8 %__u to <8 x i1> 7880 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7881 %4 = and <2 x i1> %2, %extract.i 7882 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7883 %6 = bitcast <8 x i1> %5 to i8 7884 ret i8 %6 7885} 7886 7887 7888define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 7889; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7890; VLX: # %bb.0: # %entry 7891; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 7892; VLX-NEXT: kmovd %k0, %eax 7893; VLX-NEXT: # kill: def $al killed $al killed $eax 7894; VLX-NEXT: retq 7895; 7896; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7897; NoVLX: # %bb.0: # %entry 7898; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7899; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 7900; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7901; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7902; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7903; NoVLX-NEXT: kmovw %k0, %eax 7904; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7905; NoVLX-NEXT: vzeroupper 7906; NoVLX-NEXT: retq 7907entry: 7908 %0 = bitcast <2 x i64> %__a to <2 x i64> 7909 %load = load i64, i64* %__b 7910 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7911 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7912 %2 = icmp sgt <2 x i64> %0, %1 7913 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7914 %4 = bitcast <8 x i1> %3 to i8 7915 ret i8 %4 7916} 7917 7918define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 7919; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7920; VLX: # %bb.0: # %entry 7921; VLX-NEXT: kmovd %edi, %k1 7922; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 7923; VLX-NEXT: kmovd %k0, %eax 7924; VLX-NEXT: # kill: def $al killed $al killed $eax 7925; VLX-NEXT: retq 7926; 7927; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7928; NoVLX: # %bb.0: # %entry 7929; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7930; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 7931; NoVLX-NEXT: kmovw %edi, %k1 7932; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7933; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7934; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7935; NoVLX-NEXT: kmovw %k0, %eax 7936; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7937; NoVLX-NEXT: vzeroupper 7938; NoVLX-NEXT: retq 7939entry: 7940 %0 = bitcast <2 x i64> %__a to <2 x i64> 7941 %load = load i64, i64* %__b 7942 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7943 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7944 %2 = icmp sgt <2 x i64> %0, %1 7945 %3 = bitcast i8 %__u to <8 x i1> 7946 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7947 %4 = and <2 x i1> %extract.i, %2 7948 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7949 %6 = bitcast <8 x i1> %5 to i8 7950 ret i8 %6 7951} 7952 7953 7954define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7955; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: 7956; VLX: # %bb.0: # %entry 7957; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7958; VLX-NEXT: kmovd %k0, %eax 7959; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7960; VLX-NEXT: retq 7961; 7962; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: 7963; NoVLX: # %bb.0: # %entry 7964; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7965; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7966; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7967; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7968; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7969; NoVLX-NEXT: kmovw %k0, %eax 7970; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7971; NoVLX-NEXT: vzeroupper 7972; NoVLX-NEXT: retq 7973entry: 7974 %0 = bitcast <2 x i64> %__a to <2 x i64> 7975 %1 = bitcast <2 x i64> %__b to <2 x i64> 7976 %2 = icmp sgt <2 x i64> %0, %1 7977 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7978 %4 = bitcast <16 x i1> %3 to i16 7979 ret i16 %4 7980} 7981 7982define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 7983; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: 7984; VLX: # %bb.0: # %entry 7985; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7986; VLX-NEXT: kmovd %k0, %eax 7987; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7988; VLX-NEXT: retq 7989; 7990; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: 7991; NoVLX: # %bb.0: # %entry 7992; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7993; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7994; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7995; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7996; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7997; NoVLX-NEXT: kmovw %k0, %eax 7998; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7999; NoVLX-NEXT: vzeroupper 8000; NoVLX-NEXT: retq 8001entry: 8002 %0 = bitcast <2 x i64> %__a to <2 x i64> 8003 %load = load <2 x i64>, <2 x i64>* %__b 8004 %1 = bitcast <2 x i64> %load to <2 x i64> 8005 %2 = icmp sgt <2 x i64> %0, %1 8006 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8007 %4 = bitcast <16 x i1> %3 to i16 8008 ret i16 %4 8009} 8010 8011define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8012; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: 8013; VLX: # %bb.0: # %entry 8014; VLX-NEXT: kmovd %edi, %k1 8015; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 8016; VLX-NEXT: kmovd %k0, %eax 8017; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8018; VLX-NEXT: retq 8019; 8020; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: 8021; NoVLX: # %bb.0: # %entry 8022; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8023; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8024; NoVLX-NEXT: kmovw %edi, %k1 8025; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8026; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8027; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8028; NoVLX-NEXT: kmovw %k0, %eax 8029; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8030; NoVLX-NEXT: vzeroupper 8031; NoVLX-NEXT: retq 8032entry: 8033 %0 = bitcast <2 x i64> %__a to <2 x i64> 8034 %1 = bitcast <2 x i64> %__b to <2 x i64> 8035 %2 = icmp sgt <2 x i64> %0, %1 8036 %3 = bitcast i8 %__u to <8 x i1> 8037 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8038 %4 = and <2 x i1> %2, %extract.i 8039 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8040 %6 = bitcast <16 x i1> %5 to i16 8041 ret i16 %6 8042} 8043 8044define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 8045; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: 8046; VLX: # %bb.0: # %entry 8047; VLX-NEXT: kmovd %edi, %k1 8048; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 8049; VLX-NEXT: kmovd %k0, %eax 8050; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8051; VLX-NEXT: retq 8052; 8053; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: 8054; NoVLX: # %bb.0: # %entry 8055; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8056; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 8057; NoVLX-NEXT: kmovw %edi, %k1 8058; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8059; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8060; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8061; NoVLX-NEXT: kmovw %k0, %eax 8062; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8063; NoVLX-NEXT: vzeroupper 8064; NoVLX-NEXT: retq 8065entry: 8066 %0 = bitcast <2 x i64> %__a to <2 x i64> 8067 %load = load <2 x i64>, <2 x i64>* %__b 8068 %1 = bitcast <2 x i64> %load to <2 x i64> 8069 %2 = icmp sgt <2 x i64> %0, %1 8070 %3 = bitcast i8 %__u to <8 x i1> 8071 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8072 %4 = and <2 x i1> %2, %extract.i 8073 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8074 %6 = bitcast <16 x i1> %5 to i16 8075 ret i16 %6 8076} 8077 8078 8079define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 8080; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8081; VLX: # %bb.0: # %entry 8082; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 8083; VLX-NEXT: kmovd %k0, %eax 8084; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8085; VLX-NEXT: retq 8086; 8087; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8088; NoVLX: # %bb.0: # %entry 8089; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8090; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 8091; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8092; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8093; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8094; NoVLX-NEXT: kmovw %k0, %eax 8095; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8096; NoVLX-NEXT: vzeroupper 8097; NoVLX-NEXT: retq 8098entry: 8099 %0 = bitcast <2 x i64> %__a to <2 x i64> 8100 %load = load i64, i64* %__b 8101 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8102 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8103 %2 = icmp sgt <2 x i64> %0, %1 8104 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8105 %4 = bitcast <16 x i1> %3 to i16 8106 ret i16 %4 8107} 8108 8109define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 8110; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8111; VLX: # %bb.0: # %entry 8112; VLX-NEXT: kmovd %edi, %k1 8113; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8114; VLX-NEXT: kmovd %k0, %eax 8115; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8116; VLX-NEXT: retq 8117; 8118; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8119; NoVLX: # %bb.0: # %entry 8120; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8121; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 8122; NoVLX-NEXT: kmovw %edi, %k1 8123; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8124; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8125; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8126; NoVLX-NEXT: kmovw %k0, %eax 8127; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8128; NoVLX-NEXT: vzeroupper 8129; NoVLX-NEXT: retq 8130entry: 8131 %0 = bitcast <2 x i64> %__a to <2 x i64> 8132 %load = load i64, i64* %__b 8133 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8134 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8135 %2 = icmp sgt <2 x i64> %0, %1 8136 %3 = bitcast i8 %__u to <8 x i1> 8137 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8138 %4 = and <2 x i1> %extract.i, %2 8139 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8140 %6 = bitcast <16 x i1> %5 to i16 8141 ret i16 %6 8142} 8143 8144 8145define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8146; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: 8147; VLX: # %bb.0: # %entry 8148; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 8149; VLX-NEXT: kmovd %k0, %eax 8150; VLX-NEXT: retq 8151; 8152; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: 8153; NoVLX: # %bb.0: # %entry 8154; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8155; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8156; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8157; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8158; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8159; NoVLX-NEXT: kmovw %k0, %eax 8160; NoVLX-NEXT: vzeroupper 8161; NoVLX-NEXT: retq 8162entry: 8163 %0 = bitcast <2 x i64> %__a to <2 x i64> 8164 %1 = bitcast <2 x i64> %__b to <2 x i64> 8165 %2 = icmp sgt <2 x i64> %0, %1 8166 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8167 %4 = bitcast <32 x i1> %3 to i32 8168 ret i32 %4 8169} 8170 8171define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 8172; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: 8173; VLX: # %bb.0: # %entry 8174; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 8175; VLX-NEXT: kmovd %k0, %eax 8176; VLX-NEXT: retq 8177; 8178; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: 8179; NoVLX: # %bb.0: # %entry 8180; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8181; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 8182; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8183; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8184; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8185; NoVLX-NEXT: kmovw %k0, %eax 8186; NoVLX-NEXT: vzeroupper 8187; NoVLX-NEXT: retq 8188entry: 8189 %0 = bitcast <2 x i64> %__a to <2 x i64> 8190 %load = load <2 x i64>, <2 x i64>* %__b 8191 %1 = bitcast <2 x i64> %load to <2 x i64> 8192 %2 = icmp sgt <2 x i64> %0, %1 8193 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8194 %4 = bitcast <32 x i1> %3 to i32 8195 ret i32 %4 8196} 8197 8198define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8199; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: 8200; VLX: # %bb.0: # %entry 8201; VLX-NEXT: kmovd %edi, %k1 8202; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 8203; VLX-NEXT: kmovd %k0, %eax 8204; VLX-NEXT: retq 8205; 8206; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: 8207; NoVLX: # %bb.0: # %entry 8208; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8209; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8210; NoVLX-NEXT: kmovw %edi, %k1 8211; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8212; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8213; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8214; NoVLX-NEXT: kmovw %k0, %eax 8215; NoVLX-NEXT: vzeroupper 8216; NoVLX-NEXT: retq 8217entry: 8218 %0 = bitcast <2 x i64> %__a to <2 x i64> 8219 %1 = bitcast <2 x i64> %__b to <2 x i64> 8220 %2 = icmp sgt <2 x i64> %0, %1 8221 %3 = bitcast i8 %__u to <8 x i1> 8222 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8223 %4 = and <2 x i1> %2, %extract.i 8224 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8225 %6 = bitcast <32 x i1> %5 to i32 8226 ret i32 %6 8227} 8228 8229define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 8230; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: 8231; VLX: # %bb.0: # %entry 8232; VLX-NEXT: kmovd %edi, %k1 8233; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 8234; VLX-NEXT: kmovd %k0, %eax 8235; VLX-NEXT: retq 8236; 8237; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: 8238; NoVLX: # %bb.0: # %entry 8239; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8240; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 8241; NoVLX-NEXT: kmovw %edi, %k1 8242; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8243; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8244; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8245; NoVLX-NEXT: kmovw %k0, %eax 8246; NoVLX-NEXT: vzeroupper 8247; NoVLX-NEXT: retq 8248entry: 8249 %0 = bitcast <2 x i64> %__a to <2 x i64> 8250 %load = load <2 x i64>, <2 x i64>* %__b 8251 %1 = bitcast <2 x i64> %load to <2 x i64> 8252 %2 = icmp sgt <2 x i64> %0, %1 8253 %3 = bitcast i8 %__u to <8 x i1> 8254 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8255 %4 = and <2 x i1> %2, %extract.i 8256 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8257 %6 = bitcast <32 x i1> %5 to i32 8258 ret i32 %6 8259} 8260 8261 8262define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 8263; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8264; VLX: # %bb.0: # %entry 8265; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 8266; VLX-NEXT: kmovd %k0, %eax 8267; VLX-NEXT: retq 8268; 8269; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8270; NoVLX: # %bb.0: # %entry 8271; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8272; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 8273; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8274; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8275; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8276; NoVLX-NEXT: kmovw %k0, %eax 8277; NoVLX-NEXT: vzeroupper 8278; NoVLX-NEXT: retq 8279entry: 8280 %0 = bitcast <2 x i64> %__a to <2 x i64> 8281 %load = load i64, i64* %__b 8282 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8283 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8284 %2 = icmp sgt <2 x i64> %0, %1 8285 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8286 %4 = bitcast <32 x i1> %3 to i32 8287 ret i32 %4 8288} 8289 8290define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 8291; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8292; VLX: # %bb.0: # %entry 8293; VLX-NEXT: kmovd %edi, %k1 8294; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8295; VLX-NEXT: kmovd %k0, %eax 8296; VLX-NEXT: retq 8297; 8298; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8299; NoVLX: # %bb.0: # %entry 8300; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8301; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 8302; NoVLX-NEXT: kmovw %edi, %k1 8303; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8304; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8305; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8306; NoVLX-NEXT: kmovw %k0, %eax 8307; NoVLX-NEXT: vzeroupper 8308; NoVLX-NEXT: retq 8309entry: 8310 %0 = bitcast <2 x i64> %__a to <2 x i64> 8311 %load = load i64, i64* %__b 8312 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8313 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8314 %2 = icmp sgt <2 x i64> %0, %1 8315 %3 = bitcast i8 %__u to <8 x i1> 8316 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8317 %4 = and <2 x i1> %extract.i, %2 8318 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8319 %6 = bitcast <32 x i1> %5 to i32 8320 ret i32 %6 8321} 8322 8323 8324define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8325; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: 8326; VLX: # %bb.0: # %entry 8327; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 8328; VLX-NEXT: kmovq %k0, %rax 8329; VLX-NEXT: retq 8330; 8331; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: 8332; NoVLX: # %bb.0: # %entry 8333; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8334; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8335; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8336; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8337; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8338; NoVLX-NEXT: kmovw %k0, %eax 8339; NoVLX-NEXT: movzwl %ax, %eax 8340; NoVLX-NEXT: vzeroupper 8341; NoVLX-NEXT: retq 8342entry: 8343 %0 = bitcast <2 x i64> %__a to <2 x i64> 8344 %1 = bitcast <2 x i64> %__b to <2 x i64> 8345 %2 = icmp sgt <2 x i64> %0, %1 8346 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8347 %4 = bitcast <64 x i1> %3 to i64 8348 ret i64 %4 8349} 8350 8351define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 8352; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: 8353; VLX: # %bb.0: # %entry 8354; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 8355; VLX-NEXT: kmovq %k0, %rax 8356; VLX-NEXT: retq 8357; 8358; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: 8359; NoVLX: # %bb.0: # %entry 8360; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8361; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 8362; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8363; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8364; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8365; NoVLX-NEXT: kmovw %k0, %eax 8366; NoVLX-NEXT: movzwl %ax, %eax 8367; NoVLX-NEXT: vzeroupper 8368; NoVLX-NEXT: retq 8369entry: 8370 %0 = bitcast <2 x i64> %__a to <2 x i64> 8371 %load = load <2 x i64>, <2 x i64>* %__b 8372 %1 = bitcast <2 x i64> %load to <2 x i64> 8373 %2 = icmp sgt <2 x i64> %0, %1 8374 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8375 %4 = bitcast <64 x i1> %3 to i64 8376 ret i64 %4 8377} 8378 8379define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8380; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: 8381; VLX: # %bb.0: # %entry 8382; VLX-NEXT: kmovd %edi, %k1 8383; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 8384; VLX-NEXT: kmovq %k0, %rax 8385; VLX-NEXT: retq 8386; 8387; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: 8388; NoVLX: # %bb.0: # %entry 8389; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8390; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8391; NoVLX-NEXT: kmovw %edi, %k1 8392; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8393; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8394; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8395; NoVLX-NEXT: kmovw %k0, %eax 8396; NoVLX-NEXT: movzwl %ax, %eax 8397; NoVLX-NEXT: vzeroupper 8398; NoVLX-NEXT: retq 8399entry: 8400 %0 = bitcast <2 x i64> %__a to <2 x i64> 8401 %1 = bitcast <2 x i64> %__b to <2 x i64> 8402 %2 = icmp sgt <2 x i64> %0, %1 8403 %3 = bitcast i8 %__u to <8 x i1> 8404 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8405 %4 = and <2 x i1> %2, %extract.i 8406 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8407 %6 = bitcast <64 x i1> %5 to i64 8408 ret i64 %6 8409} 8410 8411define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 8412; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: 8413; VLX: # %bb.0: # %entry 8414; VLX-NEXT: kmovd %edi, %k1 8415; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 8416; VLX-NEXT: kmovq %k0, %rax 8417; VLX-NEXT: retq 8418; 8419; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: 8420; NoVLX: # %bb.0: # %entry 8421; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8422; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 8423; NoVLX-NEXT: kmovw %edi, %k1 8424; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8425; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8426; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8427; NoVLX-NEXT: kmovw %k0, %eax 8428; NoVLX-NEXT: movzwl %ax, %eax 8429; NoVLX-NEXT: vzeroupper 8430; NoVLX-NEXT: retq 8431entry: 8432 %0 = bitcast <2 x i64> %__a to <2 x i64> 8433 %load = load <2 x i64>, <2 x i64>* %__b 8434 %1 = bitcast <2 x i64> %load to <2 x i64> 8435 %2 = icmp sgt <2 x i64> %0, %1 8436 %3 = bitcast i8 %__u to <8 x i1> 8437 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8438 %4 = and <2 x i1> %2, %extract.i 8439 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8440 %6 = bitcast <64 x i1> %5 to i64 8441 ret i64 %6 8442} 8443 8444 8445define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 8446; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8447; VLX: # %bb.0: # %entry 8448; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 8449; VLX-NEXT: kmovq %k0, %rax 8450; VLX-NEXT: retq 8451; 8452; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8453; NoVLX: # %bb.0: # %entry 8454; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8455; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 8456; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8457; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8458; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8459; NoVLX-NEXT: kmovw %k0, %eax 8460; NoVLX-NEXT: movzwl %ax, %eax 8461; NoVLX-NEXT: vzeroupper 8462; NoVLX-NEXT: retq 8463entry: 8464 %0 = bitcast <2 x i64> %__a to <2 x i64> 8465 %load = load i64, i64* %__b 8466 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8467 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8468 %2 = icmp sgt <2 x i64> %0, %1 8469 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8470 %4 = bitcast <64 x i1> %3 to i64 8471 ret i64 %4 8472} 8473 8474define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 8475; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8476; VLX: # %bb.0: # %entry 8477; VLX-NEXT: kmovd %edi, %k1 8478; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8479; VLX-NEXT: kmovq %k0, %rax 8480; VLX-NEXT: retq 8481; 8482; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8483; NoVLX: # %bb.0: # %entry 8484; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8485; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 8486; NoVLX-NEXT: kmovw %edi, %k1 8487; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8488; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8489; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8490; NoVLX-NEXT: kmovw %k0, %eax 8491; NoVLX-NEXT: movzwl %ax, %eax 8492; NoVLX-NEXT: vzeroupper 8493; NoVLX-NEXT: retq 8494entry: 8495 %0 = bitcast <2 x i64> %__a to <2 x i64> 8496 %load = load i64, i64* %__b 8497 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8498 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8499 %2 = icmp sgt <2 x i64> %0, %1 8500 %3 = bitcast i8 %__u to <8 x i1> 8501 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8502 %4 = and <2 x i1> %extract.i, %2 8503 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8504 %6 = bitcast <64 x i1> %5 to i64 8505 ret i64 %6 8506} 8507 8508 8509define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8510; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: 8511; VLX: # %bb.0: # %entry 8512; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8513; VLX-NEXT: kmovd %k0, %eax 8514; VLX-NEXT: # kill: def $al killed $al killed $eax 8515; VLX-NEXT: vzeroupper 8516; VLX-NEXT: retq 8517; 8518; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: 8519; NoVLX: # %bb.0: # %entry 8520; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8521; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8522; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8523; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8524; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8525; NoVLX-NEXT: kmovw %k0, %eax 8526; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8527; NoVLX-NEXT: vzeroupper 8528; NoVLX-NEXT: retq 8529entry: 8530 %0 = bitcast <4 x i64> %__a to <4 x i64> 8531 %1 = bitcast <4 x i64> %__b to <4 x i64> 8532 %2 = icmp sgt <4 x i64> %0, %1 8533 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8534 %4 = bitcast <8 x i1> %3 to i8 8535 ret i8 %4 8536} 8537 8538define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8539; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: 8540; VLX: # %bb.0: # %entry 8541; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8542; VLX-NEXT: kmovd %k0, %eax 8543; VLX-NEXT: # kill: def $al killed $al killed $eax 8544; VLX-NEXT: vzeroupper 8545; VLX-NEXT: retq 8546; 8547; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: 8548; NoVLX: # %bb.0: # %entry 8549; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8550; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8551; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8552; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8553; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8554; NoVLX-NEXT: kmovw %k0, %eax 8555; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8556; NoVLX-NEXT: vzeroupper 8557; NoVLX-NEXT: retq 8558entry: 8559 %0 = bitcast <4 x i64> %__a to <4 x i64> 8560 %load = load <4 x i64>, <4 x i64>* %__b 8561 %1 = bitcast <4 x i64> %load to <4 x i64> 8562 %2 = icmp sgt <4 x i64> %0, %1 8563 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8564 %4 = bitcast <8 x i1> %3 to i8 8565 ret i8 %4 8566} 8567 8568define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8569; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: 8570; VLX: # %bb.0: # %entry 8571; VLX-NEXT: kmovd %edi, %k1 8572; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8573; VLX-NEXT: kmovd %k0, %eax 8574; VLX-NEXT: # kill: def $al killed $al killed $eax 8575; VLX-NEXT: vzeroupper 8576; VLX-NEXT: retq 8577; 8578; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: 8579; NoVLX: # %bb.0: # %entry 8580; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8581; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8582; NoVLX-NEXT: kmovw %edi, %k1 8583; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8584; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8585; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8586; NoVLX-NEXT: kmovw %k0, %eax 8587; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8588; NoVLX-NEXT: vzeroupper 8589; NoVLX-NEXT: retq 8590entry: 8591 %0 = bitcast <4 x i64> %__a to <4 x i64> 8592 %1 = bitcast <4 x i64> %__b to <4 x i64> 8593 %2 = icmp sgt <4 x i64> %0, %1 8594 %3 = bitcast i8 %__u to <8 x i1> 8595 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8596 %4 = and <4 x i1> %2, %extract.i 8597 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8598 %6 = bitcast <8 x i1> %5 to i8 8599 ret i8 %6 8600} 8601 8602define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8603; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: 8604; VLX: # %bb.0: # %entry 8605; VLX-NEXT: kmovd %edi, %k1 8606; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8607; VLX-NEXT: kmovd %k0, %eax 8608; VLX-NEXT: # kill: def $al killed $al killed $eax 8609; VLX-NEXT: vzeroupper 8610; VLX-NEXT: retq 8611; 8612; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: 8613; NoVLX: # %bb.0: # %entry 8614; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8615; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 8616; NoVLX-NEXT: kmovw %edi, %k1 8617; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8618; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8619; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8620; NoVLX-NEXT: kmovw %k0, %eax 8621; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8622; NoVLX-NEXT: vzeroupper 8623; NoVLX-NEXT: retq 8624entry: 8625 %0 = bitcast <4 x i64> %__a to <4 x i64> 8626 %load = load <4 x i64>, <4 x i64>* %__b 8627 %1 = bitcast <4 x i64> %load to <4 x i64> 8628 %2 = icmp sgt <4 x i64> %0, %1 8629 %3 = bitcast i8 %__u to <8 x i1> 8630 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8631 %4 = and <4 x i1> %2, %extract.i 8632 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8633 %6 = bitcast <8 x i1> %5 to i8 8634 ret i8 %6 8635} 8636 8637 8638define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 8639; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8640; VLX: # %bb.0: # %entry 8641; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 8642; VLX-NEXT: kmovd %k0, %eax 8643; VLX-NEXT: # kill: def $al killed $al killed $eax 8644; VLX-NEXT: vzeroupper 8645; VLX-NEXT: retq 8646; 8647; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8648; NoVLX: # %bb.0: # %entry 8649; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8650; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 8651; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8652; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8653; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8654; NoVLX-NEXT: kmovw %k0, %eax 8655; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8656; NoVLX-NEXT: vzeroupper 8657; NoVLX-NEXT: retq 8658entry: 8659 %0 = bitcast <4 x i64> %__a to <4 x i64> 8660 %load = load i64, i64* %__b 8661 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8662 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8663 %2 = icmp sgt <4 x i64> %0, %1 8664 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8665 %4 = bitcast <8 x i1> %3 to i8 8666 ret i8 %4 8667} 8668 8669define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 8670; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8671; VLX: # %bb.0: # %entry 8672; VLX-NEXT: kmovd %edi, %k1 8673; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 8674; VLX-NEXT: kmovd %k0, %eax 8675; VLX-NEXT: # kill: def $al killed $al killed $eax 8676; VLX-NEXT: vzeroupper 8677; VLX-NEXT: retq 8678; 8679; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8680; NoVLX: # %bb.0: # %entry 8681; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8682; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 8683; NoVLX-NEXT: kmovw %edi, %k1 8684; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8685; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8686; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8687; NoVLX-NEXT: kmovw %k0, %eax 8688; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8689; NoVLX-NEXT: vzeroupper 8690; NoVLX-NEXT: retq 8691entry: 8692 %0 = bitcast <4 x i64> %__a to <4 x i64> 8693 %load = load i64, i64* %__b 8694 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8695 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8696 %2 = icmp sgt <4 x i64> %0, %1 8697 %3 = bitcast i8 %__u to <8 x i1> 8698 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8699 %4 = and <4 x i1> %extract.i, %2 8700 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8701 %6 = bitcast <8 x i1> %5 to i8 8702 ret i8 %6 8703} 8704 8705 8706define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8707; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: 8708; VLX: # %bb.0: # %entry 8709; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8710; VLX-NEXT: kmovd %k0, %eax 8711; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8712; VLX-NEXT: vzeroupper 8713; VLX-NEXT: retq 8714; 8715; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: 8716; NoVLX: # %bb.0: # %entry 8717; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8718; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8719; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8720; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8721; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8722; NoVLX-NEXT: kmovw %k0, %eax 8723; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8724; NoVLX-NEXT: vzeroupper 8725; NoVLX-NEXT: retq 8726entry: 8727 %0 = bitcast <4 x i64> %__a to <4 x i64> 8728 %1 = bitcast <4 x i64> %__b to <4 x i64> 8729 %2 = icmp sgt <4 x i64> %0, %1 8730 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8731 %4 = bitcast <16 x i1> %3 to i16 8732 ret i16 %4 8733} 8734 8735define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8736; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: 8737; VLX: # %bb.0: # %entry 8738; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8739; VLX-NEXT: kmovd %k0, %eax 8740; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8741; VLX-NEXT: vzeroupper 8742; VLX-NEXT: retq 8743; 8744; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: 8745; NoVLX: # %bb.0: # %entry 8746; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8747; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8748; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8749; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8750; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8751; NoVLX-NEXT: kmovw %k0, %eax 8752; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8753; NoVLX-NEXT: vzeroupper 8754; NoVLX-NEXT: retq 8755entry: 8756 %0 = bitcast <4 x i64> %__a to <4 x i64> 8757 %load = load <4 x i64>, <4 x i64>* %__b 8758 %1 = bitcast <4 x i64> %load to <4 x i64> 8759 %2 = icmp sgt <4 x i64> %0, %1 8760 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8761 %4 = bitcast <16 x i1> %3 to i16 8762 ret i16 %4 8763} 8764 8765define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8766; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: 8767; VLX: # %bb.0: # %entry 8768; VLX-NEXT: kmovd %edi, %k1 8769; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8770; VLX-NEXT: kmovd %k0, %eax 8771; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8772; VLX-NEXT: vzeroupper 8773; VLX-NEXT: retq 8774; 8775; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: 8776; NoVLX: # %bb.0: # %entry 8777; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8778; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8779; NoVLX-NEXT: kmovw %edi, %k1 8780; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8781; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8782; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8783; NoVLX-NEXT: kmovw %k0, %eax 8784; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8785; NoVLX-NEXT: vzeroupper 8786; NoVLX-NEXT: retq 8787entry: 8788 %0 = bitcast <4 x i64> %__a to <4 x i64> 8789 %1 = bitcast <4 x i64> %__b to <4 x i64> 8790 %2 = icmp sgt <4 x i64> %0, %1 8791 %3 = bitcast i8 %__u to <8 x i1> 8792 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8793 %4 = and <4 x i1> %2, %extract.i 8794 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8795 %6 = bitcast <16 x i1> %5 to i16 8796 ret i16 %6 8797} 8798 8799define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8800; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: 8801; VLX: # %bb.0: # %entry 8802; VLX-NEXT: kmovd %edi, %k1 8803; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8804; VLX-NEXT: kmovd %k0, %eax 8805; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8806; VLX-NEXT: vzeroupper 8807; VLX-NEXT: retq 8808; 8809; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: 8810; NoVLX: # %bb.0: # %entry 8811; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8812; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 8813; NoVLX-NEXT: kmovw %edi, %k1 8814; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8815; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8816; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8817; NoVLX-NEXT: kmovw %k0, %eax 8818; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8819; NoVLX-NEXT: vzeroupper 8820; NoVLX-NEXT: retq 8821entry: 8822 %0 = bitcast <4 x i64> %__a to <4 x i64> 8823 %load = load <4 x i64>, <4 x i64>* %__b 8824 %1 = bitcast <4 x i64> %load to <4 x i64> 8825 %2 = icmp sgt <4 x i64> %0, %1 8826 %3 = bitcast i8 %__u to <8 x i1> 8827 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8828 %4 = and <4 x i1> %2, %extract.i 8829 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8830 %6 = bitcast <16 x i1> %5 to i16 8831 ret i16 %6 8832} 8833 8834 8835define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 8836; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8837; VLX: # %bb.0: # %entry 8838; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 8839; VLX-NEXT: kmovd %k0, %eax 8840; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8841; VLX-NEXT: vzeroupper 8842; VLX-NEXT: retq 8843; 8844; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8845; NoVLX: # %bb.0: # %entry 8846; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8847; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 8848; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8849; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8850; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8851; NoVLX-NEXT: kmovw %k0, %eax 8852; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8853; NoVLX-NEXT: vzeroupper 8854; NoVLX-NEXT: retq 8855entry: 8856 %0 = bitcast <4 x i64> %__a to <4 x i64> 8857 %load = load i64, i64* %__b 8858 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8859 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8860 %2 = icmp sgt <4 x i64> %0, %1 8861 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8862 %4 = bitcast <16 x i1> %3 to i16 8863 ret i16 %4 8864} 8865 8866define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 8867; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8868; VLX: # %bb.0: # %entry 8869; VLX-NEXT: kmovd %edi, %k1 8870; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 8871; VLX-NEXT: kmovd %k0, %eax 8872; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8873; VLX-NEXT: vzeroupper 8874; VLX-NEXT: retq 8875; 8876; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8877; NoVLX: # %bb.0: # %entry 8878; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8879; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 8880; NoVLX-NEXT: kmovw %edi, %k1 8881; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8882; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8883; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8884; NoVLX-NEXT: kmovw %k0, %eax 8885; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8886; NoVLX-NEXT: vzeroupper 8887; NoVLX-NEXT: retq 8888entry: 8889 %0 = bitcast <4 x i64> %__a to <4 x i64> 8890 %load = load i64, i64* %__b 8891 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8892 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8893 %2 = icmp sgt <4 x i64> %0, %1 8894 %3 = bitcast i8 %__u to <8 x i1> 8895 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8896 %4 = and <4 x i1> %extract.i, %2 8897 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8898 %6 = bitcast <16 x i1> %5 to i16 8899 ret i16 %6 8900} 8901 8902 8903define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8904; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: 8905; VLX: # %bb.0: # %entry 8906; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8907; VLX-NEXT: kmovd %k0, %eax 8908; VLX-NEXT: vzeroupper 8909; VLX-NEXT: retq 8910; 8911; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: 8912; NoVLX: # %bb.0: # %entry 8913; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8914; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8915; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8916; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8917; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8918; NoVLX-NEXT: kmovw %k0, %eax 8919; NoVLX-NEXT: vzeroupper 8920; NoVLX-NEXT: retq 8921entry: 8922 %0 = bitcast <4 x i64> %__a to <4 x i64> 8923 %1 = bitcast <4 x i64> %__b to <4 x i64> 8924 %2 = icmp sgt <4 x i64> %0, %1 8925 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8926 %4 = bitcast <32 x i1> %3 to i32 8927 ret i32 %4 8928} 8929 8930define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8931; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: 8932; VLX: # %bb.0: # %entry 8933; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8934; VLX-NEXT: kmovd %k0, %eax 8935; VLX-NEXT: vzeroupper 8936; VLX-NEXT: retq 8937; 8938; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: 8939; NoVLX: # %bb.0: # %entry 8940; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8941; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8942; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8943; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8944; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8945; NoVLX-NEXT: kmovw %k0, %eax 8946; NoVLX-NEXT: vzeroupper 8947; NoVLX-NEXT: retq 8948entry: 8949 %0 = bitcast <4 x i64> %__a to <4 x i64> 8950 %load = load <4 x i64>, <4 x i64>* %__b 8951 %1 = bitcast <4 x i64> %load to <4 x i64> 8952 %2 = icmp sgt <4 x i64> %0, %1 8953 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8954 %4 = bitcast <32 x i1> %3 to i32 8955 ret i32 %4 8956} 8957 8958define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8959; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: 8960; VLX: # %bb.0: # %entry 8961; VLX-NEXT: kmovd %edi, %k1 8962; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8963; VLX-NEXT: kmovd %k0, %eax 8964; VLX-NEXT: vzeroupper 8965; VLX-NEXT: retq 8966; 8967; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: 8968; NoVLX: # %bb.0: # %entry 8969; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8970; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8971; NoVLX-NEXT: kmovw %edi, %k1 8972; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8973; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8974; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8975; NoVLX-NEXT: kmovw %k0, %eax 8976; NoVLX-NEXT: vzeroupper 8977; NoVLX-NEXT: retq 8978entry: 8979 %0 = bitcast <4 x i64> %__a to <4 x i64> 8980 %1 = bitcast <4 x i64> %__b to <4 x i64> 8981 %2 = icmp sgt <4 x i64> %0, %1 8982 %3 = bitcast i8 %__u to <8 x i1> 8983 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8984 %4 = and <4 x i1> %2, %extract.i 8985 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8986 %6 = bitcast <32 x i1> %5 to i32 8987 ret i32 %6 8988} 8989 8990define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 8991; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: 8992; VLX: # %bb.0: # %entry 8993; VLX-NEXT: kmovd %edi, %k1 8994; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8995; VLX-NEXT: kmovd %k0, %eax 8996; VLX-NEXT: vzeroupper 8997; VLX-NEXT: retq 8998; 8999; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: 9000; NoVLX: # %bb.0: # %entry 9001; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9002; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 9003; NoVLX-NEXT: kmovw %edi, %k1 9004; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9005; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9006; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9007; NoVLX-NEXT: kmovw %k0, %eax 9008; NoVLX-NEXT: vzeroupper 9009; NoVLX-NEXT: retq 9010entry: 9011 %0 = bitcast <4 x i64> %__a to <4 x i64> 9012 %load = load <4 x i64>, <4 x i64>* %__b 9013 %1 = bitcast <4 x i64> %load to <4 x i64> 9014 %2 = icmp sgt <4 x i64> %0, %1 9015 %3 = bitcast i8 %__u to <8 x i1> 9016 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9017 %4 = and <4 x i1> %2, %extract.i 9018 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9019 %6 = bitcast <32 x i1> %5 to i32 9020 ret i32 %6 9021} 9022 9023 9024define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 9025; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 9026; VLX: # %bb.0: # %entry 9027; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 9028; VLX-NEXT: kmovd %k0, %eax 9029; VLX-NEXT: vzeroupper 9030; VLX-NEXT: retq 9031; 9032; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 9033; NoVLX: # %bb.0: # %entry 9034; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9035; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 9036; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9037; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9038; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9039; NoVLX-NEXT: kmovw %k0, %eax 9040; NoVLX-NEXT: vzeroupper 9041; NoVLX-NEXT: retq 9042entry: 9043 %0 = bitcast <4 x i64> %__a to <4 x i64> 9044 %load = load i64, i64* %__b 9045 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9046 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9047 %2 = icmp sgt <4 x i64> %0, %1 9048 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9049 %4 = bitcast <32 x i1> %3 to i32 9050 ret i32 %4 9051} 9052 9053define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 9054; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 9055; VLX: # %bb.0: # %entry 9056; VLX-NEXT: kmovd %edi, %k1 9057; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 9058; VLX-NEXT: kmovd %k0, %eax 9059; VLX-NEXT: vzeroupper 9060; VLX-NEXT: retq 9061; 9062; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 9063; NoVLX: # %bb.0: # %entry 9064; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9065; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 9066; NoVLX-NEXT: kmovw %edi, %k1 9067; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9068; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9069; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9070; NoVLX-NEXT: kmovw %k0, %eax 9071; NoVLX-NEXT: vzeroupper 9072; NoVLX-NEXT: retq 9073entry: 9074 %0 = bitcast <4 x i64> %__a to <4 x i64> 9075 %load = load i64, i64* %__b 9076 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9077 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9078 %2 = icmp sgt <4 x i64> %0, %1 9079 %3 = bitcast i8 %__u to <8 x i1> 9080 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9081 %4 = and <4 x i1> %extract.i, %2 9082 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9083 %6 = bitcast <32 x i1> %5 to i32 9084 ret i32 %6 9085} 9086 9087 9088define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 9089; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: 9090; VLX: # %bb.0: # %entry 9091; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 9092; VLX-NEXT: kmovq %k0, %rax 9093; VLX-NEXT: vzeroupper 9094; VLX-NEXT: retq 9095; 9096; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: 9097; NoVLX: # %bb.0: # %entry 9098; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 9099; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9100; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9101; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9102; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9103; NoVLX-NEXT: kmovw %k0, %eax 9104; NoVLX-NEXT: movzwl %ax, %eax 9105; NoVLX-NEXT: vzeroupper 9106; NoVLX-NEXT: retq 9107entry: 9108 %0 = bitcast <4 x i64> %__a to <4 x i64> 9109 %1 = bitcast <4 x i64> %__b to <4 x i64> 9110 %2 = icmp sgt <4 x i64> %0, %1 9111 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9112 %4 = bitcast <64 x i1> %3 to i64 9113 ret i64 %4 9114} 9115 9116define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 9117; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: 9118; VLX: # %bb.0: # %entry 9119; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 9120; VLX-NEXT: kmovq %k0, %rax 9121; VLX-NEXT: vzeroupper 9122; VLX-NEXT: retq 9123; 9124; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: 9125; NoVLX: # %bb.0: # %entry 9126; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9127; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 9128; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9129; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9130; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9131; NoVLX-NEXT: kmovw %k0, %eax 9132; NoVLX-NEXT: movzwl %ax, %eax 9133; NoVLX-NEXT: vzeroupper 9134; NoVLX-NEXT: retq 9135entry: 9136 %0 = bitcast <4 x i64> %__a to <4 x i64> 9137 %load = load <4 x i64>, <4 x i64>* %__b 9138 %1 = bitcast <4 x i64> %load to <4 x i64> 9139 %2 = icmp sgt <4 x i64> %0, %1 9140 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9141 %4 = bitcast <64 x i1> %3 to i64 9142 ret i64 %4 9143} 9144 9145define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 9146; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: 9147; VLX: # %bb.0: # %entry 9148; VLX-NEXT: kmovd %edi, %k1 9149; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 9150; VLX-NEXT: kmovq %k0, %rax 9151; VLX-NEXT: vzeroupper 9152; VLX-NEXT: retq 9153; 9154; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: 9155; NoVLX: # %bb.0: # %entry 9156; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 9157; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9158; NoVLX-NEXT: kmovw %edi, %k1 9159; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9160; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9161; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9162; NoVLX-NEXT: kmovw %k0, %eax 9163; NoVLX-NEXT: movzwl %ax, %eax 9164; NoVLX-NEXT: vzeroupper 9165; NoVLX-NEXT: retq 9166entry: 9167 %0 = bitcast <4 x i64> %__a to <4 x i64> 9168 %1 = bitcast <4 x i64> %__b to <4 x i64> 9169 %2 = icmp sgt <4 x i64> %0, %1 9170 %3 = bitcast i8 %__u to <8 x i1> 9171 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9172 %4 = and <4 x i1> %2, %extract.i 9173 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9174 %6 = bitcast <64 x i1> %5 to i64 9175 ret i64 %6 9176} 9177 9178define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 9179; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: 9180; VLX: # %bb.0: # %entry 9181; VLX-NEXT: kmovd %edi, %k1 9182; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 9183; VLX-NEXT: kmovq %k0, %rax 9184; VLX-NEXT: vzeroupper 9185; VLX-NEXT: retq 9186; 9187; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: 9188; NoVLX: # %bb.0: # %entry 9189; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9190; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 9191; NoVLX-NEXT: kmovw %edi, %k1 9192; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9193; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9194; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9195; NoVLX-NEXT: kmovw %k0, %eax 9196; NoVLX-NEXT: movzwl %ax, %eax 9197; NoVLX-NEXT: vzeroupper 9198; NoVLX-NEXT: retq 9199entry: 9200 %0 = bitcast <4 x i64> %__a to <4 x i64> 9201 %load = load <4 x i64>, <4 x i64>* %__b 9202 %1 = bitcast <4 x i64> %load to <4 x i64> 9203 %2 = icmp sgt <4 x i64> %0, %1 9204 %3 = bitcast i8 %__u to <8 x i1> 9205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9206 %4 = and <4 x i1> %2, %extract.i 9207 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9208 %6 = bitcast <64 x i1> %5 to i64 9209 ret i64 %6 9210} 9211 9212 9213define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 9214; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9215; VLX: # %bb.0: # %entry 9216; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 9217; VLX-NEXT: kmovq %k0, %rax 9218; VLX-NEXT: vzeroupper 9219; VLX-NEXT: retq 9220; 9221; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9222; NoVLX: # %bb.0: # %entry 9223; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9224; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 9225; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9226; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9227; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9228; NoVLX-NEXT: kmovw %k0, %eax 9229; NoVLX-NEXT: movzwl %ax, %eax 9230; NoVLX-NEXT: vzeroupper 9231; NoVLX-NEXT: retq 9232entry: 9233 %0 = bitcast <4 x i64> %__a to <4 x i64> 9234 %load = load i64, i64* %__b 9235 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9236 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9237 %2 = icmp sgt <4 x i64> %0, %1 9238 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9239 %4 = bitcast <64 x i1> %3 to i64 9240 ret i64 %4 9241} 9242 9243define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 9244; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9245; VLX: # %bb.0: # %entry 9246; VLX-NEXT: kmovd %edi, %k1 9247; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 9248; VLX-NEXT: kmovq %k0, %rax 9249; VLX-NEXT: vzeroupper 9250; VLX-NEXT: retq 9251; 9252; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9253; NoVLX: # %bb.0: # %entry 9254; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9255; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 9256; NoVLX-NEXT: kmovw %edi, %k1 9257; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9258; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9259; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9260; NoVLX-NEXT: kmovw %k0, %eax 9261; NoVLX-NEXT: movzwl %ax, %eax 9262; NoVLX-NEXT: vzeroupper 9263; NoVLX-NEXT: retq 9264entry: 9265 %0 = bitcast <4 x i64> %__a to <4 x i64> 9266 %load = load i64, i64* %__b 9267 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9268 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9269 %2 = icmp sgt <4 x i64> %0, %1 9270 %3 = bitcast i8 %__u to <8 x i1> 9271 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9272 %4 = and <4 x i1> %extract.i, %2 9273 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9274 %6 = bitcast <64 x i1> %5 to i64 9275 ret i64 %6 9276} 9277 9278 9279define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9280; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: 9281; VLX: # %bb.0: # %entry 9282; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9283; VLX-NEXT: kmovd %k0, %eax 9284; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9285; VLX-NEXT: vzeroupper 9286; VLX-NEXT: retq 9287; 9288; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: 9289; NoVLX: # %bb.0: # %entry 9290; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9291; NoVLX-NEXT: kmovw %k0, %eax 9292; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9293; NoVLX-NEXT: vzeroupper 9294; NoVLX-NEXT: retq 9295entry: 9296 %0 = bitcast <8 x i64> %__a to <8 x i64> 9297 %1 = bitcast <8 x i64> %__b to <8 x i64> 9298 %2 = icmp sgt <8 x i64> %0, %1 9299 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9300 %4 = bitcast <16 x i1> %3 to i16 9301 ret i16 %4 9302} 9303 9304define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9305; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: 9306; VLX: # %bb.0: # %entry 9307; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9308; VLX-NEXT: kmovd %k0, %eax 9309; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9310; VLX-NEXT: vzeroupper 9311; VLX-NEXT: retq 9312; 9313; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: 9314; NoVLX: # %bb.0: # %entry 9315; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9316; NoVLX-NEXT: kmovw %k0, %eax 9317; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9318; NoVLX-NEXT: vzeroupper 9319; NoVLX-NEXT: retq 9320entry: 9321 %0 = bitcast <8 x i64> %__a to <8 x i64> 9322 %load = load <8 x i64>, <8 x i64>* %__b 9323 %1 = bitcast <8 x i64> %load to <8 x i64> 9324 %2 = icmp sgt <8 x i64> %0, %1 9325 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9326 %4 = bitcast <16 x i1> %3 to i16 9327 ret i16 %4 9328} 9329 9330define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9331; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: 9332; VLX: # %bb.0: # %entry 9333; VLX-NEXT: kmovd %edi, %k1 9334; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9335; VLX-NEXT: kmovd %k0, %eax 9336; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9337; VLX-NEXT: vzeroupper 9338; VLX-NEXT: retq 9339; 9340; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: 9341; NoVLX: # %bb.0: # %entry 9342; NoVLX-NEXT: kmovw %edi, %k1 9343; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9344; NoVLX-NEXT: kmovw %k0, %eax 9345; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9346; NoVLX-NEXT: vzeroupper 9347; NoVLX-NEXT: retq 9348entry: 9349 %0 = bitcast <8 x i64> %__a to <8 x i64> 9350 %1 = bitcast <8 x i64> %__b to <8 x i64> 9351 %2 = icmp sgt <8 x i64> %0, %1 9352 %3 = bitcast i8 %__u to <8 x i1> 9353 %4 = and <8 x i1> %2, %3 9354 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9355 %6 = bitcast <16 x i1> %5 to i16 9356 ret i16 %6 9357} 9358 9359define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9360; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: 9361; VLX: # %bb.0: # %entry 9362; VLX-NEXT: kmovd %edi, %k1 9363; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9364; VLX-NEXT: kmovd %k0, %eax 9365; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9366; VLX-NEXT: vzeroupper 9367; VLX-NEXT: retq 9368; 9369; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: 9370; NoVLX: # %bb.0: # %entry 9371; NoVLX-NEXT: kmovw %edi, %k1 9372; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9373; NoVLX-NEXT: kmovw %k0, %eax 9374; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9375; NoVLX-NEXT: vzeroupper 9376; NoVLX-NEXT: retq 9377entry: 9378 %0 = bitcast <8 x i64> %__a to <8 x i64> 9379 %load = load <8 x i64>, <8 x i64>* %__b 9380 %1 = bitcast <8 x i64> %load to <8 x i64> 9381 %2 = icmp sgt <8 x i64> %0, %1 9382 %3 = bitcast i8 %__u to <8 x i1> 9383 %4 = and <8 x i1> %2, %3 9384 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9385 %6 = bitcast <16 x i1> %5 to i16 9386 ret i16 %6 9387} 9388 9389 9390define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 9391; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9392; VLX: # %bb.0: # %entry 9393; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9394; VLX-NEXT: kmovd %k0, %eax 9395; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9396; VLX-NEXT: vzeroupper 9397; VLX-NEXT: retq 9398; 9399; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9400; NoVLX: # %bb.0: # %entry 9401; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9402; NoVLX-NEXT: kmovw %k0, %eax 9403; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9404; NoVLX-NEXT: vzeroupper 9405; NoVLX-NEXT: retq 9406entry: 9407 %0 = bitcast <8 x i64> %__a to <8 x i64> 9408 %load = load i64, i64* %__b 9409 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9410 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9411 %2 = icmp sgt <8 x i64> %0, %1 9412 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9413 %4 = bitcast <16 x i1> %3 to i16 9414 ret i16 %4 9415} 9416 9417define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 9418; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9419; VLX: # %bb.0: # %entry 9420; VLX-NEXT: kmovd %edi, %k1 9421; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9422; VLX-NEXT: kmovd %k0, %eax 9423; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9424; VLX-NEXT: vzeroupper 9425; VLX-NEXT: retq 9426; 9427; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9428; NoVLX: # %bb.0: # %entry 9429; NoVLX-NEXT: kmovw %edi, %k1 9430; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9431; NoVLX-NEXT: kmovw %k0, %eax 9432; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9433; NoVLX-NEXT: vzeroupper 9434; NoVLX-NEXT: retq 9435entry: 9436 %0 = bitcast <8 x i64> %__a to <8 x i64> 9437 %load = load i64, i64* %__b 9438 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9439 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9440 %2 = icmp sgt <8 x i64> %0, %1 9441 %3 = bitcast i8 %__u to <8 x i1> 9442 %4 = and <8 x i1> %3, %2 9443 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9444 %6 = bitcast <16 x i1> %5 to i16 9445 ret i16 %6 9446} 9447 9448 9449define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9450; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: 9451; VLX: # %bb.0: # %entry 9452; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9453; VLX-NEXT: kmovd %k0, %eax 9454; VLX-NEXT: vzeroupper 9455; VLX-NEXT: retq 9456; 9457; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: 9458; NoVLX: # %bb.0: # %entry 9459; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9460; NoVLX-NEXT: kmovw %k0, %eax 9461; NoVLX-NEXT: vzeroupper 9462; NoVLX-NEXT: retq 9463entry: 9464 %0 = bitcast <8 x i64> %__a to <8 x i64> 9465 %1 = bitcast <8 x i64> %__b to <8 x i64> 9466 %2 = icmp sgt <8 x i64> %0, %1 9467 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9468 %4 = bitcast <32 x i1> %3 to i32 9469 ret i32 %4 9470} 9471 9472define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9473; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: 9474; VLX: # %bb.0: # %entry 9475; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9476; VLX-NEXT: kmovd %k0, %eax 9477; VLX-NEXT: vzeroupper 9478; VLX-NEXT: retq 9479; 9480; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: 9481; NoVLX: # %bb.0: # %entry 9482; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9483; NoVLX-NEXT: kmovw %k0, %eax 9484; NoVLX-NEXT: vzeroupper 9485; NoVLX-NEXT: retq 9486entry: 9487 %0 = bitcast <8 x i64> %__a to <8 x i64> 9488 %load = load <8 x i64>, <8 x i64>* %__b 9489 %1 = bitcast <8 x i64> %load to <8 x i64> 9490 %2 = icmp sgt <8 x i64> %0, %1 9491 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9492 %4 = bitcast <32 x i1> %3 to i32 9493 ret i32 %4 9494} 9495 9496define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9497; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: 9498; VLX: # %bb.0: # %entry 9499; VLX-NEXT: kmovd %edi, %k1 9500; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9501; VLX-NEXT: kmovd %k0, %eax 9502; VLX-NEXT: vzeroupper 9503; VLX-NEXT: retq 9504; 9505; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: 9506; NoVLX: # %bb.0: # %entry 9507; NoVLX-NEXT: kmovw %edi, %k1 9508; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9509; NoVLX-NEXT: kmovw %k0, %eax 9510; NoVLX-NEXT: vzeroupper 9511; NoVLX-NEXT: retq 9512entry: 9513 %0 = bitcast <8 x i64> %__a to <8 x i64> 9514 %1 = bitcast <8 x i64> %__b to <8 x i64> 9515 %2 = icmp sgt <8 x i64> %0, %1 9516 %3 = bitcast i8 %__u to <8 x i1> 9517 %4 = and <8 x i1> %2, %3 9518 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9519 %6 = bitcast <32 x i1> %5 to i32 9520 ret i32 %6 9521} 9522 9523define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9524; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: 9525; VLX: # %bb.0: # %entry 9526; VLX-NEXT: kmovd %edi, %k1 9527; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9528; VLX-NEXT: kmovd %k0, %eax 9529; VLX-NEXT: vzeroupper 9530; VLX-NEXT: retq 9531; 9532; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: 9533; NoVLX: # %bb.0: # %entry 9534; NoVLX-NEXT: kmovw %edi, %k1 9535; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9536; NoVLX-NEXT: kmovw %k0, %eax 9537; NoVLX-NEXT: vzeroupper 9538; NoVLX-NEXT: retq 9539entry: 9540 %0 = bitcast <8 x i64> %__a to <8 x i64> 9541 %load = load <8 x i64>, <8 x i64>* %__b 9542 %1 = bitcast <8 x i64> %load to <8 x i64> 9543 %2 = icmp sgt <8 x i64> %0, %1 9544 %3 = bitcast i8 %__u to <8 x i1> 9545 %4 = and <8 x i1> %2, %3 9546 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9547 %6 = bitcast <32 x i1> %5 to i32 9548 ret i32 %6 9549} 9550 9551 9552define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 9553; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9554; VLX: # %bb.0: # %entry 9555; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9556; VLX-NEXT: kmovd %k0, %eax 9557; VLX-NEXT: vzeroupper 9558; VLX-NEXT: retq 9559; 9560; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9561; NoVLX: # %bb.0: # %entry 9562; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9563; NoVLX-NEXT: kmovw %k0, %eax 9564; NoVLX-NEXT: vzeroupper 9565; NoVLX-NEXT: retq 9566entry: 9567 %0 = bitcast <8 x i64> %__a to <8 x i64> 9568 %load = load i64, i64* %__b 9569 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9570 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9571 %2 = icmp sgt <8 x i64> %0, %1 9572 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9573 %4 = bitcast <32 x i1> %3 to i32 9574 ret i32 %4 9575} 9576 9577define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 9578; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9579; VLX: # %bb.0: # %entry 9580; VLX-NEXT: kmovd %edi, %k1 9581; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9582; VLX-NEXT: kmovd %k0, %eax 9583; VLX-NEXT: vzeroupper 9584; VLX-NEXT: retq 9585; 9586; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9587; NoVLX: # %bb.0: # %entry 9588; NoVLX-NEXT: kmovw %edi, %k1 9589; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9590; NoVLX-NEXT: kmovw %k0, %eax 9591; NoVLX-NEXT: vzeroupper 9592; NoVLX-NEXT: retq 9593entry: 9594 %0 = bitcast <8 x i64> %__a to <8 x i64> 9595 %load = load i64, i64* %__b 9596 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9597 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9598 %2 = icmp sgt <8 x i64> %0, %1 9599 %3 = bitcast i8 %__u to <8 x i1> 9600 %4 = and <8 x i1> %3, %2 9601 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9602 %6 = bitcast <32 x i1> %5 to i32 9603 ret i32 %6 9604} 9605 9606 9607define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9608; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: 9609; VLX: # %bb.0: # %entry 9610; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9611; VLX-NEXT: kmovq %k0, %rax 9612; VLX-NEXT: vzeroupper 9613; VLX-NEXT: retq 9614; 9615; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: 9616; NoVLX: # %bb.0: # %entry 9617; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9618; NoVLX-NEXT: kmovw %k0, %eax 9619; NoVLX-NEXT: movzwl %ax, %eax 9620; NoVLX-NEXT: vzeroupper 9621; NoVLX-NEXT: retq 9622entry: 9623 %0 = bitcast <8 x i64> %__a to <8 x i64> 9624 %1 = bitcast <8 x i64> %__b to <8 x i64> 9625 %2 = icmp sgt <8 x i64> %0, %1 9626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9627 %4 = bitcast <64 x i1> %3 to i64 9628 ret i64 %4 9629} 9630 9631define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9632; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: 9633; VLX: # %bb.0: # %entry 9634; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9635; VLX-NEXT: kmovq %k0, %rax 9636; VLX-NEXT: vzeroupper 9637; VLX-NEXT: retq 9638; 9639; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: 9640; NoVLX: # %bb.0: # %entry 9641; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9642; NoVLX-NEXT: kmovw %k0, %eax 9643; NoVLX-NEXT: movzwl %ax, %eax 9644; NoVLX-NEXT: vzeroupper 9645; NoVLX-NEXT: retq 9646entry: 9647 %0 = bitcast <8 x i64> %__a to <8 x i64> 9648 %load = load <8 x i64>, <8 x i64>* %__b 9649 %1 = bitcast <8 x i64> %load to <8 x i64> 9650 %2 = icmp sgt <8 x i64> %0, %1 9651 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9652 %4 = bitcast <64 x i1> %3 to i64 9653 ret i64 %4 9654} 9655 9656define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9657; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: 9658; VLX: # %bb.0: # %entry 9659; VLX-NEXT: kmovd %edi, %k1 9660; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9661; VLX-NEXT: kmovq %k0, %rax 9662; VLX-NEXT: vzeroupper 9663; VLX-NEXT: retq 9664; 9665; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: 9666; NoVLX: # %bb.0: # %entry 9667; NoVLX-NEXT: kmovw %edi, %k1 9668; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9669; NoVLX-NEXT: kmovw %k0, %eax 9670; NoVLX-NEXT: movzwl %ax, %eax 9671; NoVLX-NEXT: vzeroupper 9672; NoVLX-NEXT: retq 9673entry: 9674 %0 = bitcast <8 x i64> %__a to <8 x i64> 9675 %1 = bitcast <8 x i64> %__b to <8 x i64> 9676 %2 = icmp sgt <8 x i64> %0, %1 9677 %3 = bitcast i8 %__u to <8 x i1> 9678 %4 = and <8 x i1> %2, %3 9679 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9680 %6 = bitcast <64 x i1> %5 to i64 9681 ret i64 %6 9682} 9683 9684define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 9685; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: 9686; VLX: # %bb.0: # %entry 9687; VLX-NEXT: kmovd %edi, %k1 9688; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9689; VLX-NEXT: kmovq %k0, %rax 9690; VLX-NEXT: vzeroupper 9691; VLX-NEXT: retq 9692; 9693; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: 9694; NoVLX: # %bb.0: # %entry 9695; NoVLX-NEXT: kmovw %edi, %k1 9696; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9697; NoVLX-NEXT: kmovw %k0, %eax 9698; NoVLX-NEXT: movzwl %ax, %eax 9699; NoVLX-NEXT: vzeroupper 9700; NoVLX-NEXT: retq 9701entry: 9702 %0 = bitcast <8 x i64> %__a to <8 x i64> 9703 %load = load <8 x i64>, <8 x i64>* %__b 9704 %1 = bitcast <8 x i64> %load to <8 x i64> 9705 %2 = icmp sgt <8 x i64> %0, %1 9706 %3 = bitcast i8 %__u to <8 x i1> 9707 %4 = and <8 x i1> %2, %3 9708 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9709 %6 = bitcast <64 x i1> %5 to i64 9710 ret i64 %6 9711} 9712 9713 9714define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 9715; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9716; VLX: # %bb.0: # %entry 9717; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9718; VLX-NEXT: kmovq %k0, %rax 9719; VLX-NEXT: vzeroupper 9720; VLX-NEXT: retq 9721; 9722; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9723; NoVLX: # %bb.0: # %entry 9724; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9725; NoVLX-NEXT: kmovw %k0, %eax 9726; NoVLX-NEXT: movzwl %ax, %eax 9727; NoVLX-NEXT: vzeroupper 9728; NoVLX-NEXT: retq 9729entry: 9730 %0 = bitcast <8 x i64> %__a to <8 x i64> 9731 %load = load i64, i64* %__b 9732 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9733 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9734 %2 = icmp sgt <8 x i64> %0, %1 9735 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9736 %4 = bitcast <64 x i1> %3 to i64 9737 ret i64 %4 9738} 9739 9740define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 9741; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9742; VLX: # %bb.0: # %entry 9743; VLX-NEXT: kmovd %edi, %k1 9744; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9745; VLX-NEXT: kmovq %k0, %rax 9746; VLX-NEXT: vzeroupper 9747; VLX-NEXT: retq 9748; 9749; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9750; NoVLX: # %bb.0: # %entry 9751; NoVLX-NEXT: kmovw %edi, %k1 9752; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9753; NoVLX-NEXT: kmovw %k0, %eax 9754; NoVLX-NEXT: movzwl %ax, %eax 9755; NoVLX-NEXT: vzeroupper 9756; NoVLX-NEXT: retq 9757entry: 9758 %0 = bitcast <8 x i64> %__a to <8 x i64> 9759 %load = load i64, i64* %__b 9760 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9761 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9762 %2 = icmp sgt <8 x i64> %0, %1 9763 %3 = bitcast i8 %__u to <8 x i1> 9764 %4 = and <8 x i1> %3, %2 9765 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9766 %6 = bitcast <64 x i1> %5 to i64 9767 ret i64 %6 9768} 9769 9770 9771define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9772; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: 9773; VLX: # %bb.0: # %entry 9774; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 9775; VLX-NEXT: kmovd %k0, %eax 9776; VLX-NEXT: retq 9777; 9778; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: 9779; NoVLX: # %bb.0: # %entry 9780; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9781; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9782; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9783; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9784; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9785; NoVLX-NEXT: kmovw %k0, %eax 9786; NoVLX-NEXT: vzeroupper 9787; NoVLX-NEXT: retq 9788entry: 9789 %0 = bitcast <2 x i64> %__a to <16 x i8> 9790 %1 = bitcast <2 x i64> %__b to <16 x i8> 9791 %2 = icmp sge <16 x i8> %0, %1 9792 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9793 %4 = bitcast <32 x i1> %3 to i32 9794 ret i32 %4 9795} 9796 9797define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 9798; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: 9799; VLX: # %bb.0: # %entry 9800; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 9801; VLX-NEXT: kmovd %k0, %eax 9802; VLX-NEXT: retq 9803; 9804; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: 9805; NoVLX: # %bb.0: # %entry 9806; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 9807; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9808; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9809; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9810; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9811; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9812; NoVLX-NEXT: kmovw %k0, %eax 9813; NoVLX-NEXT: vzeroupper 9814; NoVLX-NEXT: retq 9815entry: 9816 %0 = bitcast <2 x i64> %__a to <16 x i8> 9817 %load = load <2 x i64>, <2 x i64>* %__b 9818 %1 = bitcast <2 x i64> %load to <16 x i8> 9819 %2 = icmp sge <16 x i8> %0, %1 9820 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9821 %4 = bitcast <32 x i1> %3 to i32 9822 ret i32 %4 9823} 9824 9825define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9826; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: 9827; VLX: # %bb.0: # %entry 9828; VLX-NEXT: kmovd %edi, %k1 9829; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} 9830; VLX-NEXT: kmovd %k0, %eax 9831; VLX-NEXT: retq 9832; 9833; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: 9834; NoVLX: # %bb.0: # %entry 9835; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9836; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9837; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9838; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9839; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9840; NoVLX-NEXT: kmovw %k0, %eax 9841; NoVLX-NEXT: andl %edi, %eax 9842; NoVLX-NEXT: vzeroupper 9843; NoVLX-NEXT: retq 9844entry: 9845 %0 = bitcast <2 x i64> %__a to <16 x i8> 9846 %1 = bitcast <2 x i64> %__b to <16 x i8> 9847 %2 = icmp sge <16 x i8> %0, %1 9848 %3 = bitcast i16 %__u to <16 x i1> 9849 %4 = and <16 x i1> %2, %3 9850 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9851 %6 = bitcast <32 x i1> %5 to i32 9852 ret i32 %6 9853} 9854 9855define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 9856; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: 9857; VLX: # %bb.0: # %entry 9858; VLX-NEXT: kmovd %edi, %k1 9859; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} 9860; VLX-NEXT: kmovd %k0, %eax 9861; VLX-NEXT: retq 9862; 9863; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: 9864; NoVLX: # %bb.0: # %entry 9865; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 9866; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9867; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9868; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9869; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9870; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9871; NoVLX-NEXT: kmovw %k0, %eax 9872; NoVLX-NEXT: andl %edi, %eax 9873; NoVLX-NEXT: vzeroupper 9874; NoVLX-NEXT: retq 9875entry: 9876 %0 = bitcast <2 x i64> %__a to <16 x i8> 9877 %load = load <2 x i64>, <2 x i64>* %__b 9878 %1 = bitcast <2 x i64> %load to <16 x i8> 9879 %2 = icmp sge <16 x i8> %0, %1 9880 %3 = bitcast i16 %__u to <16 x i1> 9881 %4 = and <16 x i1> %2, %3 9882 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9883 %6 = bitcast <32 x i1> %5 to i32 9884 ret i32 %6 9885} 9886 9887 9888define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9889; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: 9890; VLX: # %bb.0: # %entry 9891; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 9892; VLX-NEXT: kmovq %k0, %rax 9893; VLX-NEXT: retq 9894; 9895; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: 9896; NoVLX: # %bb.0: # %entry 9897; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9898; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9899; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9900; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9901; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9902; NoVLX-NEXT: kmovw %k0, %eax 9903; NoVLX-NEXT: movzwl %ax, %eax 9904; NoVLX-NEXT: vzeroupper 9905; NoVLX-NEXT: retq 9906entry: 9907 %0 = bitcast <2 x i64> %__a to <16 x i8> 9908 %1 = bitcast <2 x i64> %__b to <16 x i8> 9909 %2 = icmp sge <16 x i8> %0, %1 9910 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9911 %4 = bitcast <64 x i1> %3 to i64 9912 ret i64 %4 9913} 9914 9915define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 9916; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: 9917; VLX: # %bb.0: # %entry 9918; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 9919; VLX-NEXT: kmovq %k0, %rax 9920; VLX-NEXT: retq 9921; 9922; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: 9923; NoVLX: # %bb.0: # %entry 9924; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 9925; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9926; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9927; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9928; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9929; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9930; NoVLX-NEXT: kmovw %k0, %eax 9931; NoVLX-NEXT: movzwl %ax, %eax 9932; NoVLX-NEXT: vzeroupper 9933; NoVLX-NEXT: retq 9934entry: 9935 %0 = bitcast <2 x i64> %__a to <16 x i8> 9936 %load = load <2 x i64>, <2 x i64>* %__b 9937 %1 = bitcast <2 x i64> %load to <16 x i8> 9938 %2 = icmp sge <16 x i8> %0, %1 9939 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9940 %4 = bitcast <64 x i1> %3 to i64 9941 ret i64 %4 9942} 9943 9944define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9945; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: 9946; VLX: # %bb.0: # %entry 9947; VLX-NEXT: kmovd %edi, %k1 9948; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} 9949; VLX-NEXT: kmovq %k0, %rax 9950; VLX-NEXT: retq 9951; 9952; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: 9953; NoVLX: # %bb.0: # %entry 9954; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9955; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9956; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9957; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9958; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9959; NoVLX-NEXT: kmovw %k0, %eax 9960; NoVLX-NEXT: andl %edi, %eax 9961; NoVLX-NEXT: vzeroupper 9962; NoVLX-NEXT: retq 9963entry: 9964 %0 = bitcast <2 x i64> %__a to <16 x i8> 9965 %1 = bitcast <2 x i64> %__b to <16 x i8> 9966 %2 = icmp sge <16 x i8> %0, %1 9967 %3 = bitcast i16 %__u to <16 x i1> 9968 %4 = and <16 x i1> %2, %3 9969 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9970 %6 = bitcast <64 x i1> %5 to i64 9971 ret i64 %6 9972} 9973 9974define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 9975; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: 9976; VLX: # %bb.0: # %entry 9977; VLX-NEXT: kmovd %edi, %k1 9978; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} 9979; VLX-NEXT: kmovq %k0, %rax 9980; VLX-NEXT: retq 9981; 9982; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: 9983; NoVLX: # %bb.0: # %entry 9984; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 9985; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9986; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9987; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9988; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 9989; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9990; NoVLX-NEXT: kmovw %k0, %eax 9991; NoVLX-NEXT: andl %edi, %eax 9992; NoVLX-NEXT: vzeroupper 9993; NoVLX-NEXT: retq 9994entry: 9995 %0 = bitcast <2 x i64> %__a to <16 x i8> 9996 %load = load <2 x i64>, <2 x i64>* %__b 9997 %1 = bitcast <2 x i64> %load to <16 x i8> 9998 %2 = icmp sge <16 x i8> %0, %1 9999 %3 = bitcast i16 %__u to <16 x i1> 10000 %4 = and <16 x i1> %2, %3 10001 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10002 %6 = bitcast <64 x i1> %5 to i64 10003 ret i64 %6 10004} 10005 10006 10007define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10008; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: 10009; VLX: # %bb.0: # %entry 10010; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 10011; VLX-NEXT: kmovq %k0, %rax 10012; VLX-NEXT: vzeroupper 10013; VLX-NEXT: retq 10014; 10015; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: 10016; NoVLX: # %bb.0: # %entry 10017; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 10018; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10019; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 10020; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 10021; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10022; NoVLX-NEXT: kmovw %k0, %ecx 10023; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 10024; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 10025; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10026; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10027; NoVLX-NEXT: kmovw %k0, %eax 10028; NoVLX-NEXT: shll $16, %eax 10029; NoVLX-NEXT: orl %ecx, %eax 10030; NoVLX-NEXT: vzeroupper 10031; NoVLX-NEXT: retq 10032entry: 10033 %0 = bitcast <4 x i64> %__a to <32 x i8> 10034 %1 = bitcast <4 x i64> %__b to <32 x i8> 10035 %2 = icmp sge <32 x i8> %0, %1 10036 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10037 %4 = bitcast <64 x i1> %3 to i64 10038 ret i64 %4 10039} 10040 10041define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10042; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: 10043; VLX: # %bb.0: # %entry 10044; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0 10045; VLX-NEXT: kmovq %k0, %rax 10046; VLX-NEXT: vzeroupper 10047; VLX-NEXT: retq 10048; 10049; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: 10050; NoVLX: # %bb.0: # %entry 10051; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10052; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 10053; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10054; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 10055; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 10056; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10057; NoVLX-NEXT: kmovw %k0, %ecx 10058; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 10059; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 10060; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10061; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10062; NoVLX-NEXT: kmovw %k0, %eax 10063; NoVLX-NEXT: shll $16, %eax 10064; NoVLX-NEXT: orl %ecx, %eax 10065; NoVLX-NEXT: vzeroupper 10066; NoVLX-NEXT: retq 10067entry: 10068 %0 = bitcast <4 x i64> %__a to <32 x i8> 10069 %load = load <4 x i64>, <4 x i64>* %__b 10070 %1 = bitcast <4 x i64> %load to <32 x i8> 10071 %2 = icmp sge <32 x i8> %0, %1 10072 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10073 %4 = bitcast <64 x i1> %3 to i64 10074 ret i64 %4 10075} 10076 10077define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10078; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: 10079; VLX: # %bb.0: # %entry 10080; VLX-NEXT: kmovd %edi, %k1 10081; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} 10082; VLX-NEXT: kmovq %k0, %rax 10083; VLX-NEXT: vzeroupper 10084; VLX-NEXT: retq 10085; 10086; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: 10087; NoVLX: # %bb.0: # %entry 10088; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 10089; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10090; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 10091; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 10092; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10093; NoVLX-NEXT: kmovw %k0, %eax 10094; NoVLX-NEXT: andl %edi, %eax 10095; NoVLX-NEXT: shrl $16, %edi 10096; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 10097; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 10098; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10099; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10100; NoVLX-NEXT: kmovw %k0, %ecx 10101; NoVLX-NEXT: andl %edi, %ecx 10102; NoVLX-NEXT: shll $16, %ecx 10103; NoVLX-NEXT: movzwl %ax, %eax 10104; NoVLX-NEXT: orl %ecx, %eax 10105; NoVLX-NEXT: vzeroupper 10106; NoVLX-NEXT: retq 10107entry: 10108 %0 = bitcast <4 x i64> %__a to <32 x i8> 10109 %1 = bitcast <4 x i64> %__b to <32 x i8> 10110 %2 = icmp sge <32 x i8> %0, %1 10111 %3 = bitcast i32 %__u to <32 x i1> 10112 %4 = and <32 x i1> %2, %3 10113 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10114 %6 = bitcast <64 x i1> %5 to i64 10115 ret i64 %6 10116} 10117 10118define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10119; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: 10120; VLX: # %bb.0: # %entry 10121; VLX-NEXT: kmovd %edi, %k1 10122; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1} 10123; VLX-NEXT: kmovq %k0, %rax 10124; VLX-NEXT: vzeroupper 10125; VLX-NEXT: retq 10126; 10127; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: 10128; NoVLX: # %bb.0: # %entry 10129; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10130; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 10131; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10132; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 10133; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 10134; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10135; NoVLX-NEXT: kmovw %k0, %eax 10136; NoVLX-NEXT: andl %edi, %eax 10137; NoVLX-NEXT: shrl $16, %edi 10138; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 10139; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 10140; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10141; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10142; NoVLX-NEXT: kmovw %k0, %ecx 10143; NoVLX-NEXT: andl %edi, %ecx 10144; NoVLX-NEXT: shll $16, %ecx 10145; NoVLX-NEXT: movzwl %ax, %eax 10146; NoVLX-NEXT: orl %ecx, %eax 10147; NoVLX-NEXT: vzeroupper 10148; NoVLX-NEXT: retq 10149entry: 10150 %0 = bitcast <4 x i64> %__a to <32 x i8> 10151 %load = load <4 x i64>, <4 x i64>* %__b 10152 %1 = bitcast <4 x i64> %load to <32 x i8> 10153 %2 = icmp sge <32 x i8> %0, %1 10154 %3 = bitcast i32 %__u to <32 x i1> 10155 %4 = and <32 x i1> %2, %3 10156 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10157 %6 = bitcast <64 x i1> %5 to i64 10158 ret i64 %6 10159} 10160 10161 10162define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10163; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: 10164; VLX: # %bb.0: # %entry 10165; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10166; VLX-NEXT: kmovd %k0, %eax 10167; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10168; VLX-NEXT: retq 10169; 10170; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: 10171; NoVLX: # %bb.0: # %entry 10172; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10173; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10174; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10175; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10176; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10177; NoVLX-NEXT: kmovw %k0, %eax 10178; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10179; NoVLX-NEXT: vzeroupper 10180; NoVLX-NEXT: retq 10181entry: 10182 %0 = bitcast <2 x i64> %__a to <8 x i16> 10183 %1 = bitcast <2 x i64> %__b to <8 x i16> 10184 %2 = icmp sge <8 x i16> %0, %1 10185 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10186 %4 = bitcast <16 x i1> %3 to i16 10187 ret i16 %4 10188} 10189 10190define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10191; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: 10192; VLX: # %bb.0: # %entry 10193; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10194; VLX-NEXT: kmovd %k0, %eax 10195; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10196; VLX-NEXT: retq 10197; 10198; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: 10199; NoVLX: # %bb.0: # %entry 10200; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10201; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10202; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10203; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10204; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10205; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10206; NoVLX-NEXT: kmovw %k0, %eax 10207; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10208; NoVLX-NEXT: vzeroupper 10209; NoVLX-NEXT: retq 10210entry: 10211 %0 = bitcast <2 x i64> %__a to <8 x i16> 10212 %load = load <2 x i64>, <2 x i64>* %__b 10213 %1 = bitcast <2 x i64> %load to <8 x i16> 10214 %2 = icmp sge <8 x i16> %0, %1 10215 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10216 %4 = bitcast <16 x i1> %3 to i16 10217 ret i16 %4 10218} 10219 10220define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10221; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: 10222; VLX: # %bb.0: # %entry 10223; VLX-NEXT: kmovd %edi, %k1 10224; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10225; VLX-NEXT: kmovd %k0, %eax 10226; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10227; VLX-NEXT: retq 10228; 10229; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: 10230; NoVLX: # %bb.0: # %entry 10231; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10232; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10233; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10234; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10235; NoVLX-NEXT: kmovw %edi, %k1 10236; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10237; NoVLX-NEXT: kmovw %k0, %eax 10238; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10239; NoVLX-NEXT: vzeroupper 10240; NoVLX-NEXT: retq 10241entry: 10242 %0 = bitcast <2 x i64> %__a to <8 x i16> 10243 %1 = bitcast <2 x i64> %__b to <8 x i16> 10244 %2 = icmp sge <8 x i16> %0, %1 10245 %3 = bitcast i8 %__u to <8 x i1> 10246 %4 = and <8 x i1> %2, %3 10247 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10248 %6 = bitcast <16 x i1> %5 to i16 10249 ret i16 %6 10250} 10251 10252define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10253; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: 10254; VLX: # %bb.0: # %entry 10255; VLX-NEXT: kmovd %edi, %k1 10256; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10257; VLX-NEXT: kmovd %k0, %eax 10258; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10259; VLX-NEXT: retq 10260; 10261; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: 10262; NoVLX: # %bb.0: # %entry 10263; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10264; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10265; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10266; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10267; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10268; NoVLX-NEXT: kmovw %edi, %k1 10269; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10270; NoVLX-NEXT: kmovw %k0, %eax 10271; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10272; NoVLX-NEXT: vzeroupper 10273; NoVLX-NEXT: retq 10274entry: 10275 %0 = bitcast <2 x i64> %__a to <8 x i16> 10276 %load = load <2 x i64>, <2 x i64>* %__b 10277 %1 = bitcast <2 x i64> %load to <8 x i16> 10278 %2 = icmp sge <8 x i16> %0, %1 10279 %3 = bitcast i8 %__u to <8 x i1> 10280 %4 = and <8 x i1> %2, %3 10281 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10282 %6 = bitcast <16 x i1> %5 to i16 10283 ret i16 %6 10284} 10285 10286 10287define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10288; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: 10289; VLX: # %bb.0: # %entry 10290; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10291; VLX-NEXT: kmovd %k0, %eax 10292; VLX-NEXT: retq 10293; 10294; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: 10295; NoVLX: # %bb.0: # %entry 10296; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10297; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10298; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10299; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10300; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10301; NoVLX-NEXT: kmovw %k0, %eax 10302; NoVLX-NEXT: vzeroupper 10303; NoVLX-NEXT: retq 10304entry: 10305 %0 = bitcast <2 x i64> %__a to <8 x i16> 10306 %1 = bitcast <2 x i64> %__b to <8 x i16> 10307 %2 = icmp sge <8 x i16> %0, %1 10308 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10309 %4 = bitcast <32 x i1> %3 to i32 10310 ret i32 %4 10311} 10312 10313define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10314; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: 10315; VLX: # %bb.0: # %entry 10316; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10317; VLX-NEXT: kmovd %k0, %eax 10318; VLX-NEXT: retq 10319; 10320; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: 10321; NoVLX: # %bb.0: # %entry 10322; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10323; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10324; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10325; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10326; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10327; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10328; NoVLX-NEXT: kmovw %k0, %eax 10329; NoVLX-NEXT: vzeroupper 10330; NoVLX-NEXT: retq 10331entry: 10332 %0 = bitcast <2 x i64> %__a to <8 x i16> 10333 %load = load <2 x i64>, <2 x i64>* %__b 10334 %1 = bitcast <2 x i64> %load to <8 x i16> 10335 %2 = icmp sge <8 x i16> %0, %1 10336 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10337 %4 = bitcast <32 x i1> %3 to i32 10338 ret i32 %4 10339} 10340 10341define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10342; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: 10343; VLX: # %bb.0: # %entry 10344; VLX-NEXT: kmovd %edi, %k1 10345; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10346; VLX-NEXT: kmovd %k0, %eax 10347; VLX-NEXT: retq 10348; 10349; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: 10350; NoVLX: # %bb.0: # %entry 10351; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10352; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10353; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10354; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10355; NoVLX-NEXT: kmovw %edi, %k1 10356; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10357; NoVLX-NEXT: kmovw %k0, %eax 10358; NoVLX-NEXT: vzeroupper 10359; NoVLX-NEXT: retq 10360entry: 10361 %0 = bitcast <2 x i64> %__a to <8 x i16> 10362 %1 = bitcast <2 x i64> %__b to <8 x i16> 10363 %2 = icmp sge <8 x i16> %0, %1 10364 %3 = bitcast i8 %__u to <8 x i1> 10365 %4 = and <8 x i1> %2, %3 10366 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10367 %6 = bitcast <32 x i1> %5 to i32 10368 ret i32 %6 10369} 10370 10371define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10372; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: 10373; VLX: # %bb.0: # %entry 10374; VLX-NEXT: kmovd %edi, %k1 10375; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10376; VLX-NEXT: kmovd %k0, %eax 10377; VLX-NEXT: retq 10378; 10379; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: 10380; NoVLX: # %bb.0: # %entry 10381; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10382; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10383; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10384; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10385; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10386; NoVLX-NEXT: kmovw %edi, %k1 10387; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10388; NoVLX-NEXT: kmovw %k0, %eax 10389; NoVLX-NEXT: vzeroupper 10390; NoVLX-NEXT: retq 10391entry: 10392 %0 = bitcast <2 x i64> %__a to <8 x i16> 10393 %load = load <2 x i64>, <2 x i64>* %__b 10394 %1 = bitcast <2 x i64> %load to <8 x i16> 10395 %2 = icmp sge <8 x i16> %0, %1 10396 %3 = bitcast i8 %__u to <8 x i1> 10397 %4 = and <8 x i1> %2, %3 10398 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10399 %6 = bitcast <32 x i1> %5 to i32 10400 ret i32 %6 10401} 10402 10403 10404define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10405; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: 10406; VLX: # %bb.0: # %entry 10407; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10408; VLX-NEXT: kmovq %k0, %rax 10409; VLX-NEXT: retq 10410; 10411; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: 10412; NoVLX: # %bb.0: # %entry 10413; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10414; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10415; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10416; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10417; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10418; NoVLX-NEXT: kmovw %k0, %eax 10419; NoVLX-NEXT: movzwl %ax, %eax 10420; NoVLX-NEXT: vzeroupper 10421; NoVLX-NEXT: retq 10422entry: 10423 %0 = bitcast <2 x i64> %__a to <8 x i16> 10424 %1 = bitcast <2 x i64> %__b to <8 x i16> 10425 %2 = icmp sge <8 x i16> %0, %1 10426 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10427 %4 = bitcast <64 x i1> %3 to i64 10428 ret i64 %4 10429} 10430 10431define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10432; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: 10433; VLX: # %bb.0: # %entry 10434; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10435; VLX-NEXT: kmovq %k0, %rax 10436; VLX-NEXT: retq 10437; 10438; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: 10439; NoVLX: # %bb.0: # %entry 10440; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10441; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10442; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10443; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10444; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10445; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10446; NoVLX-NEXT: kmovw %k0, %eax 10447; NoVLX-NEXT: movzwl %ax, %eax 10448; NoVLX-NEXT: vzeroupper 10449; NoVLX-NEXT: retq 10450entry: 10451 %0 = bitcast <2 x i64> %__a to <8 x i16> 10452 %load = load <2 x i64>, <2 x i64>* %__b 10453 %1 = bitcast <2 x i64> %load to <8 x i16> 10454 %2 = icmp sge <8 x i16> %0, %1 10455 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10456 %4 = bitcast <64 x i1> %3 to i64 10457 ret i64 %4 10458} 10459 10460define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10461; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: 10462; VLX: # %bb.0: # %entry 10463; VLX-NEXT: kmovd %edi, %k1 10464; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10465; VLX-NEXT: kmovq %k0, %rax 10466; VLX-NEXT: retq 10467; 10468; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: 10469; NoVLX: # %bb.0: # %entry 10470; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10471; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10472; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10473; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10474; NoVLX-NEXT: kmovw %edi, %k1 10475; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10476; NoVLX-NEXT: kmovw %k0, %eax 10477; NoVLX-NEXT: movzwl %ax, %eax 10478; NoVLX-NEXT: vzeroupper 10479; NoVLX-NEXT: retq 10480entry: 10481 %0 = bitcast <2 x i64> %__a to <8 x i16> 10482 %1 = bitcast <2 x i64> %__b to <8 x i16> 10483 %2 = icmp sge <8 x i16> %0, %1 10484 %3 = bitcast i8 %__u to <8 x i1> 10485 %4 = and <8 x i1> %2, %3 10486 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10487 %6 = bitcast <64 x i1> %5 to i64 10488 ret i64 %6 10489} 10490 10491define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10492; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: 10493; VLX: # %bb.0: # %entry 10494; VLX-NEXT: kmovd %edi, %k1 10495; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10496; VLX-NEXT: kmovq %k0, %rax 10497; VLX-NEXT: retq 10498; 10499; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: 10500; NoVLX: # %bb.0: # %entry 10501; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10502; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10503; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10504; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10505; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 10506; NoVLX-NEXT: kmovw %edi, %k1 10507; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10508; NoVLX-NEXT: kmovw %k0, %eax 10509; NoVLX-NEXT: movzwl %ax, %eax 10510; NoVLX-NEXT: vzeroupper 10511; NoVLX-NEXT: retq 10512entry: 10513 %0 = bitcast <2 x i64> %__a to <8 x i16> 10514 %load = load <2 x i64>, <2 x i64>* %__b 10515 %1 = bitcast <2 x i64> %load to <8 x i16> 10516 %2 = icmp sge <8 x i16> %0, %1 10517 %3 = bitcast i8 %__u to <8 x i1> 10518 %4 = and <8 x i1> %2, %3 10519 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10520 %6 = bitcast <64 x i1> %5 to i64 10521 ret i64 %6 10522} 10523 10524 10525define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10526; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: 10527; VLX: # %bb.0: # %entry 10528; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 10529; VLX-NEXT: kmovd %k0, %eax 10530; VLX-NEXT: vzeroupper 10531; VLX-NEXT: retq 10532; 10533; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: 10534; NoVLX: # %bb.0: # %entry 10535; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10536; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10537; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10538; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10539; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10540; NoVLX-NEXT: kmovw %k0, %eax 10541; NoVLX-NEXT: vzeroupper 10542; NoVLX-NEXT: retq 10543entry: 10544 %0 = bitcast <4 x i64> %__a to <16 x i16> 10545 %1 = bitcast <4 x i64> %__b to <16 x i16> 10546 %2 = icmp sge <16 x i16> %0, %1 10547 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10548 %4 = bitcast <32 x i1> %3 to i32 10549 ret i32 %4 10550} 10551 10552define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10553; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: 10554; VLX: # %bb.0: # %entry 10555; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 10556; VLX-NEXT: kmovd %k0, %eax 10557; VLX-NEXT: vzeroupper 10558; VLX-NEXT: retq 10559; 10560; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: 10561; NoVLX: # %bb.0: # %entry 10562; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10563; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10564; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10565; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10566; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10567; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10568; NoVLX-NEXT: kmovw %k0, %eax 10569; NoVLX-NEXT: vzeroupper 10570; NoVLX-NEXT: retq 10571entry: 10572 %0 = bitcast <4 x i64> %__a to <16 x i16> 10573 %load = load <4 x i64>, <4 x i64>* %__b 10574 %1 = bitcast <4 x i64> %load to <16 x i16> 10575 %2 = icmp sge <16 x i16> %0, %1 10576 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10577 %4 = bitcast <32 x i1> %3 to i32 10578 ret i32 %4 10579} 10580 10581define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10582; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: 10583; VLX: # %bb.0: # %entry 10584; VLX-NEXT: kmovd %edi, %k1 10585; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} 10586; VLX-NEXT: kmovd %k0, %eax 10587; VLX-NEXT: vzeroupper 10588; VLX-NEXT: retq 10589; 10590; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: 10591; NoVLX: # %bb.0: # %entry 10592; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10593; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10594; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10595; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10596; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10597; NoVLX-NEXT: kmovw %k0, %eax 10598; NoVLX-NEXT: andl %edi, %eax 10599; NoVLX-NEXT: vzeroupper 10600; NoVLX-NEXT: retq 10601entry: 10602 %0 = bitcast <4 x i64> %__a to <16 x i16> 10603 %1 = bitcast <4 x i64> %__b to <16 x i16> 10604 %2 = icmp sge <16 x i16> %0, %1 10605 %3 = bitcast i16 %__u to <16 x i1> 10606 %4 = and <16 x i1> %2, %3 10607 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10608 %6 = bitcast <32 x i1> %5 to i32 10609 ret i32 %6 10610} 10611 10612define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10613; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: 10614; VLX: # %bb.0: # %entry 10615; VLX-NEXT: kmovd %edi, %k1 10616; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} 10617; VLX-NEXT: kmovd %k0, %eax 10618; VLX-NEXT: vzeroupper 10619; VLX-NEXT: retq 10620; 10621; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: 10622; NoVLX: # %bb.0: # %entry 10623; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10624; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10625; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10626; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10627; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10628; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10629; NoVLX-NEXT: kmovw %k0, %eax 10630; NoVLX-NEXT: andl %edi, %eax 10631; NoVLX-NEXT: vzeroupper 10632; NoVLX-NEXT: retq 10633entry: 10634 %0 = bitcast <4 x i64> %__a to <16 x i16> 10635 %load = load <4 x i64>, <4 x i64>* %__b 10636 %1 = bitcast <4 x i64> %load to <16 x i16> 10637 %2 = icmp sge <16 x i16> %0, %1 10638 %3 = bitcast i16 %__u to <16 x i1> 10639 %4 = and <16 x i1> %2, %3 10640 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10641 %6 = bitcast <32 x i1> %5 to i32 10642 ret i32 %6 10643} 10644 10645 10646define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10647; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: 10648; VLX: # %bb.0: # %entry 10649; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 10650; VLX-NEXT: kmovq %k0, %rax 10651; VLX-NEXT: vzeroupper 10652; VLX-NEXT: retq 10653; 10654; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: 10655; NoVLX: # %bb.0: # %entry 10656; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10657; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10658; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10659; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10660; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10661; NoVLX-NEXT: kmovw %k0, %eax 10662; NoVLX-NEXT: movzwl %ax, %eax 10663; NoVLX-NEXT: vzeroupper 10664; NoVLX-NEXT: retq 10665entry: 10666 %0 = bitcast <4 x i64> %__a to <16 x i16> 10667 %1 = bitcast <4 x i64> %__b to <16 x i16> 10668 %2 = icmp sge <16 x i16> %0, %1 10669 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10670 %4 = bitcast <64 x i1> %3 to i64 10671 ret i64 %4 10672} 10673 10674define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10675; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: 10676; VLX: # %bb.0: # %entry 10677; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 10678; VLX-NEXT: kmovq %k0, %rax 10679; VLX-NEXT: vzeroupper 10680; VLX-NEXT: retq 10681; 10682; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: 10683; NoVLX: # %bb.0: # %entry 10684; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10685; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10686; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10687; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10688; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10689; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10690; NoVLX-NEXT: kmovw %k0, %eax 10691; NoVLX-NEXT: movzwl %ax, %eax 10692; NoVLX-NEXT: vzeroupper 10693; NoVLX-NEXT: retq 10694entry: 10695 %0 = bitcast <4 x i64> %__a to <16 x i16> 10696 %load = load <4 x i64>, <4 x i64>* %__b 10697 %1 = bitcast <4 x i64> %load to <16 x i16> 10698 %2 = icmp sge <16 x i16> %0, %1 10699 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10700 %4 = bitcast <64 x i1> %3 to i64 10701 ret i64 %4 10702} 10703 10704define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10705; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: 10706; VLX: # %bb.0: # %entry 10707; VLX-NEXT: kmovd %edi, %k1 10708; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} 10709; VLX-NEXT: kmovq %k0, %rax 10710; VLX-NEXT: vzeroupper 10711; VLX-NEXT: retq 10712; 10713; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: 10714; NoVLX: # %bb.0: # %entry 10715; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10716; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10717; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10718; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10719; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10720; NoVLX-NEXT: kmovw %k0, %eax 10721; NoVLX-NEXT: andl %edi, %eax 10722; NoVLX-NEXT: vzeroupper 10723; NoVLX-NEXT: retq 10724entry: 10725 %0 = bitcast <4 x i64> %__a to <16 x i16> 10726 %1 = bitcast <4 x i64> %__b to <16 x i16> 10727 %2 = icmp sge <16 x i16> %0, %1 10728 %3 = bitcast i16 %__u to <16 x i1> 10729 %4 = and <16 x i1> %2, %3 10730 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10731 %6 = bitcast <64 x i1> %5 to i64 10732 ret i64 %6 10733} 10734 10735define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 10736; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: 10737; VLX: # %bb.0: # %entry 10738; VLX-NEXT: kmovd %edi, %k1 10739; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} 10740; VLX-NEXT: kmovq %k0, %rax 10741; VLX-NEXT: vzeroupper 10742; VLX-NEXT: retq 10743; 10744; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: 10745; NoVLX: # %bb.0: # %entry 10746; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10747; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10748; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10749; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10750; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10751; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10752; NoVLX-NEXT: kmovw %k0, %eax 10753; NoVLX-NEXT: andl %edi, %eax 10754; NoVLX-NEXT: vzeroupper 10755; NoVLX-NEXT: retq 10756entry: 10757 %0 = bitcast <4 x i64> %__a to <16 x i16> 10758 %load = load <4 x i64>, <4 x i64>* %__b 10759 %1 = bitcast <4 x i64> %load to <16 x i16> 10760 %2 = icmp sge <16 x i16> %0, %1 10761 %3 = bitcast i16 %__u to <16 x i1> 10762 %4 = and <16 x i1> %2, %3 10763 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10764 %6 = bitcast <64 x i1> %5 to i64 10765 ret i64 %6 10766} 10767 10768 10769define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 10770; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: 10771; VLX: # %bb.0: # %entry 10772; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 10773; VLX-NEXT: kmovq %k0, %rax 10774; VLX-NEXT: vzeroupper 10775; VLX-NEXT: retq 10776; 10777; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: 10778; NoVLX: # %bb.0: # %entry 10779; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 10780; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 10781; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 10782; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10783; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10784; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10785; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10786; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10787; NoVLX-NEXT: kmovw %k0, %ecx 10788; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 10789; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 10790; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10791; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10792; NoVLX-NEXT: kmovw %k0, %eax 10793; NoVLX-NEXT: shll $16, %eax 10794; NoVLX-NEXT: orl %ecx, %eax 10795; NoVLX-NEXT: vzeroupper 10796; NoVLX-NEXT: retq 10797entry: 10798 %0 = bitcast <8 x i64> %__a to <32 x i16> 10799 %1 = bitcast <8 x i64> %__b to <32 x i16> 10800 %2 = icmp sge <32 x i16> %0, %1 10801 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10802 %4 = bitcast <64 x i1> %3 to i64 10803 ret i64 %4 10804} 10805 10806define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 10807; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: 10808; VLX: # %bb.0: # %entry 10809; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0 10810; VLX-NEXT: kmovq %k0, %rax 10811; VLX-NEXT: vzeroupper 10812; VLX-NEXT: retq 10813; 10814; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: 10815; NoVLX: # %bb.0: # %entry 10816; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 10817; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 10818; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3 10819; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 10820; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 10821; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10822; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10823; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10824; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10825; NoVLX-NEXT: kmovw %k0, %ecx 10826; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 10827; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 10828; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10829; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10830; NoVLX-NEXT: kmovw %k0, %eax 10831; NoVLX-NEXT: shll $16, %eax 10832; NoVLX-NEXT: orl %ecx, %eax 10833; NoVLX-NEXT: vzeroupper 10834; NoVLX-NEXT: retq 10835entry: 10836 %0 = bitcast <8 x i64> %__a to <32 x i16> 10837 %load = load <8 x i64>, <8 x i64>* %__b 10838 %1 = bitcast <8 x i64> %load to <32 x i16> 10839 %2 = icmp sge <32 x i16> %0, %1 10840 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10841 %4 = bitcast <64 x i1> %3 to i64 10842 ret i64 %4 10843} 10844 10845define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 10846; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: 10847; VLX: # %bb.0: # %entry 10848; VLX-NEXT: kmovd %edi, %k1 10849; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 10850; VLX-NEXT: kmovq %k0, %rax 10851; VLX-NEXT: vzeroupper 10852; VLX-NEXT: retq 10853; 10854; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: 10855; NoVLX: # %bb.0: # %entry 10856; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 10857; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 10858; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 10859; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 10860; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 10861; NoVLX-NEXT: kmovw %k0, %eax 10862; NoVLX-NEXT: andl %edi, %eax 10863; NoVLX-NEXT: shrl $16, %edi 10864; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10865; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 10866; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10867; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10868; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10869; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10870; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10871; NoVLX-NEXT: kmovw %k0, %ecx 10872; NoVLX-NEXT: andl %edi, %ecx 10873; NoVLX-NEXT: shll $16, %ecx 10874; NoVLX-NEXT: movzwl %ax, %eax 10875; NoVLX-NEXT: orl %ecx, %eax 10876; NoVLX-NEXT: vzeroupper 10877; NoVLX-NEXT: retq 10878entry: 10879 %0 = bitcast <8 x i64> %__a to <32 x i16> 10880 %1 = bitcast <8 x i64> %__b to <32 x i16> 10881 %2 = icmp sge <32 x i16> %0, %1 10882 %3 = bitcast i32 %__u to <32 x i1> 10883 %4 = and <32 x i1> %2, %3 10884 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10885 %6 = bitcast <64 x i1> %5 to i64 10886 ret i64 %6 10887} 10888 10889define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 10890; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: 10891; VLX: # %bb.0: # %entry 10892; VLX-NEXT: kmovd %edi, %k1 10893; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1} 10894; VLX-NEXT: kmovq %k0, %rax 10895; VLX-NEXT: vzeroupper 10896; VLX-NEXT: retq 10897; 10898; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: 10899; NoVLX: # %bb.0: # %entry 10900; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10901; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 10902; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 10903; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 10904; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 10905; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10906; NoVLX-NEXT: kmovw %k0, %eax 10907; NoVLX-NEXT: andl %edi, %eax 10908; NoVLX-NEXT: shrl $16, %edi 10909; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10910; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1 10911; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10912; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10913; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10914; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 10915; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10916; NoVLX-NEXT: kmovw %k0, %ecx 10917; NoVLX-NEXT: andl %edi, %ecx 10918; NoVLX-NEXT: shll $16, %ecx 10919; NoVLX-NEXT: movzwl %ax, %eax 10920; NoVLX-NEXT: orl %ecx, %eax 10921; NoVLX-NEXT: vzeroupper 10922; NoVLX-NEXT: retq 10923entry: 10924 %0 = bitcast <8 x i64> %__a to <32 x i16> 10925 %load = load <8 x i64>, <8 x i64>* %__b 10926 %1 = bitcast <8 x i64> %load to <32 x i16> 10927 %2 = icmp sge <32 x i16> %0, %1 10928 %3 = bitcast i32 %__u to <32 x i1> 10929 %4 = and <32 x i1> %2, %3 10930 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10931 %6 = bitcast <64 x i1> %5 to i64 10932 ret i64 %6 10933} 10934 10935 10936define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10937; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: 10938; VLX: # %bb.0: # %entry 10939; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 10940; VLX-NEXT: kmovd %k0, %eax 10941; VLX-NEXT: # kill: def $al killed $al killed $eax 10942; VLX-NEXT: retq 10943; 10944; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: 10945; NoVLX: # %bb.0: # %entry 10946; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 10947; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10948; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10949; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10950; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10951; NoVLX-NEXT: kmovw %k0, %eax 10952; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10953; NoVLX-NEXT: vzeroupper 10954; NoVLX-NEXT: retq 10955entry: 10956 %0 = bitcast <2 x i64> %__a to <4 x i32> 10957 %1 = bitcast <2 x i64> %__b to <4 x i32> 10958 %2 = icmp sge <4 x i32> %0, %1 10959 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10960 %4 = bitcast <8 x i1> %3 to i8 10961 ret i8 %4 10962} 10963 10964define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 10965; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: 10966; VLX: # %bb.0: # %entry 10967; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 10968; VLX-NEXT: kmovd %k0, %eax 10969; VLX-NEXT: # kill: def $al killed $al killed $eax 10970; VLX-NEXT: retq 10971; 10972; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: 10973; NoVLX: # %bb.0: # %entry 10974; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10975; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10976; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10977; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10978; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10979; NoVLX-NEXT: kmovw %k0, %eax 10980; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10981; NoVLX-NEXT: vzeroupper 10982; NoVLX-NEXT: retq 10983entry: 10984 %0 = bitcast <2 x i64> %__a to <4 x i32> 10985 %load = load <2 x i64>, <2 x i64>* %__b 10986 %1 = bitcast <2 x i64> %load to <4 x i32> 10987 %2 = icmp sge <4 x i32> %0, %1 10988 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10989 %4 = bitcast <8 x i1> %3 to i8 10990 ret i8 %4 10991} 10992 10993define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10994; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: 10995; VLX: # %bb.0: # %entry 10996; VLX-NEXT: kmovd %edi, %k1 10997; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 10998; VLX-NEXT: kmovd %k0, %eax 10999; VLX-NEXT: # kill: def $al killed $al killed $eax 11000; VLX-NEXT: retq 11001; 11002; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: 11003; NoVLX: # %bb.0: # %entry 11004; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11005; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11006; NoVLX-NEXT: kmovw %edi, %k1 11007; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11008; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11009; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11010; NoVLX-NEXT: kmovw %k0, %eax 11011; NoVLX-NEXT: # kill: def $al killed $al killed $eax 11012; NoVLX-NEXT: vzeroupper 11013; NoVLX-NEXT: retq 11014entry: 11015 %0 = bitcast <2 x i64> %__a to <4 x i32> 11016 %1 = bitcast <2 x i64> %__b to <4 x i32> 11017 %2 = icmp sge <4 x i32> %0, %1 11018 %3 = bitcast i8 %__u to <8 x i1> 11019 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11020 %4 = and <4 x i1> %2, %extract.i 11021 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 11022 %6 = bitcast <8 x i1> %5 to i8 11023 ret i8 %6 11024} 11025 11026define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11027; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: 11028; VLX: # %bb.0: # %entry 11029; VLX-NEXT: kmovd %edi, %k1 11030; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11031; VLX-NEXT: kmovd %k0, %eax 11032; VLX-NEXT: # kill: def $al killed $al killed $eax 11033; VLX-NEXT: retq 11034; 11035; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: 11036; NoVLX: # %bb.0: # %entry 11037; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11038; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11039; NoVLX-NEXT: kmovw %edi, %k1 11040; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11041; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11042; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11043; NoVLX-NEXT: kmovw %k0, %eax 11044; NoVLX-NEXT: # kill: def $al killed $al killed $eax 11045; NoVLX-NEXT: vzeroupper 11046; NoVLX-NEXT: retq 11047entry: 11048 %0 = bitcast <2 x i64> %__a to <4 x i32> 11049 %load = load <2 x i64>, <2 x i64>* %__b 11050 %1 = bitcast <2 x i64> %load to <4 x i32> 11051 %2 = icmp sge <4 x i32> %0, %1 11052 %3 = bitcast i8 %__u to <8 x i1> 11053 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11054 %4 = and <4 x i1> %2, %extract.i 11055 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 11056 %6 = bitcast <8 x i1> %5 to i8 11057 ret i8 %6 11058} 11059 11060 11061define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 11062; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: 11063; VLX: # %bb.0: # %entry 11064; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11065; VLX-NEXT: kmovd %k0, %eax 11066; VLX-NEXT: # kill: def $al killed $al killed $eax 11067; VLX-NEXT: retq 11068; 11069; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: 11070; NoVLX: # %bb.0: # %entry 11071; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11072; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 11073; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11074; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11075; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11076; NoVLX-NEXT: kmovw %k0, %eax 11077; NoVLX-NEXT: # kill: def $al killed $al killed $eax 11078; NoVLX-NEXT: vzeroupper 11079; NoVLX-NEXT: retq 11080entry: 11081 %0 = bitcast <2 x i64> %__a to <4 x i32> 11082 %load = load i32, i32* %__b 11083 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11084 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11085 %2 = icmp sge <4 x i32> %0, %1 11086 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 11087 %4 = bitcast <8 x i1> %3 to i8 11088 ret i8 %4 11089} 11090 11091define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 11092; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: 11093; VLX: # %bb.0: # %entry 11094; VLX-NEXT: kmovd %edi, %k1 11095; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11096; VLX-NEXT: kmovd %k0, %eax 11097; VLX-NEXT: # kill: def $al killed $al killed $eax 11098; VLX-NEXT: retq 11099; 11100; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: 11101; NoVLX: # %bb.0: # %entry 11102; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11103; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 11104; NoVLX-NEXT: kmovw %edi, %k1 11105; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11106; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11107; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11108; NoVLX-NEXT: kmovw %k0, %eax 11109; NoVLX-NEXT: # kill: def $al killed $al killed $eax 11110; NoVLX-NEXT: vzeroupper 11111; NoVLX-NEXT: retq 11112entry: 11113 %0 = bitcast <2 x i64> %__a to <4 x i32> 11114 %load = load i32, i32* %__b 11115 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11116 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11117 %2 = icmp sge <4 x i32> %0, %1 11118 %3 = bitcast i8 %__u to <8 x i1> 11119 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11120 %4 = and <4 x i1> %extract.i, %2 11121 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 11122 %6 = bitcast <8 x i1> %5 to i8 11123 ret i8 %6 11124} 11125 11126 11127define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11128; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: 11129; VLX: # %bb.0: # %entry 11130; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 11131; VLX-NEXT: kmovd %k0, %eax 11132; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11133; VLX-NEXT: retq 11134; 11135; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: 11136; NoVLX: # %bb.0: # %entry 11137; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11138; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11139; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11140; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11141; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11142; NoVLX-NEXT: kmovw %k0, %eax 11143; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11144; NoVLX-NEXT: vzeroupper 11145; NoVLX-NEXT: retq 11146entry: 11147 %0 = bitcast <2 x i64> %__a to <4 x i32> 11148 %1 = bitcast <2 x i64> %__b to <4 x i32> 11149 %2 = icmp sge <4 x i32> %0, %1 11150 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11151 %4 = bitcast <16 x i1> %3 to i16 11152 ret i16 %4 11153} 11154 11155define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11156; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: 11157; VLX: # %bb.0: # %entry 11158; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 11159; VLX-NEXT: kmovd %k0, %eax 11160; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11161; VLX-NEXT: retq 11162; 11163; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: 11164; NoVLX: # %bb.0: # %entry 11165; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11166; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 11167; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11168; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11169; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11170; NoVLX-NEXT: kmovw %k0, %eax 11171; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11172; NoVLX-NEXT: vzeroupper 11173; NoVLX-NEXT: retq 11174entry: 11175 %0 = bitcast <2 x i64> %__a to <4 x i32> 11176 %load = load <2 x i64>, <2 x i64>* %__b 11177 %1 = bitcast <2 x i64> %load to <4 x i32> 11178 %2 = icmp sge <4 x i32> %0, %1 11179 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11180 %4 = bitcast <16 x i1> %3 to i16 11181 ret i16 %4 11182} 11183 11184define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11185; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: 11186; VLX: # %bb.0: # %entry 11187; VLX-NEXT: kmovd %edi, %k1 11188; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 11189; VLX-NEXT: kmovd %k0, %eax 11190; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11191; VLX-NEXT: retq 11192; 11193; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: 11194; NoVLX: # %bb.0: # %entry 11195; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11196; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11197; NoVLX-NEXT: kmovw %edi, %k1 11198; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11199; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11200; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11201; NoVLX-NEXT: kmovw %k0, %eax 11202; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11203; NoVLX-NEXT: vzeroupper 11204; NoVLX-NEXT: retq 11205entry: 11206 %0 = bitcast <2 x i64> %__a to <4 x i32> 11207 %1 = bitcast <2 x i64> %__b to <4 x i32> 11208 %2 = icmp sge <4 x i32> %0, %1 11209 %3 = bitcast i8 %__u to <8 x i1> 11210 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11211 %4 = and <4 x i1> %2, %extract.i 11212 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11213 %6 = bitcast <16 x i1> %5 to i16 11214 ret i16 %6 11215} 11216 11217define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11218; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: 11219; VLX: # %bb.0: # %entry 11220; VLX-NEXT: kmovd %edi, %k1 11221; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11222; VLX-NEXT: kmovd %k0, %eax 11223; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11224; VLX-NEXT: retq 11225; 11226; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: 11227; NoVLX: # %bb.0: # %entry 11228; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11229; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11230; NoVLX-NEXT: kmovw %edi, %k1 11231; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11232; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11233; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11234; NoVLX-NEXT: kmovw %k0, %eax 11235; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11236; NoVLX-NEXT: vzeroupper 11237; NoVLX-NEXT: retq 11238entry: 11239 %0 = bitcast <2 x i64> %__a to <4 x i32> 11240 %load = load <2 x i64>, <2 x i64>* %__b 11241 %1 = bitcast <2 x i64> %load to <4 x i32> 11242 %2 = icmp sge <4 x i32> %0, %1 11243 %3 = bitcast i8 %__u to <8 x i1> 11244 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11245 %4 = and <4 x i1> %2, %extract.i 11246 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11247 %6 = bitcast <16 x i1> %5 to i16 11248 ret i16 %6 11249} 11250 11251 11252define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 11253; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: 11254; VLX: # %bb.0: # %entry 11255; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11256; VLX-NEXT: kmovd %k0, %eax 11257; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11258; VLX-NEXT: retq 11259; 11260; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: 11261; NoVLX: # %bb.0: # %entry 11262; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11263; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 11264; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11265; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11266; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11267; NoVLX-NEXT: kmovw %k0, %eax 11268; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11269; NoVLX-NEXT: vzeroupper 11270; NoVLX-NEXT: retq 11271entry: 11272 %0 = bitcast <2 x i64> %__a to <4 x i32> 11273 %load = load i32, i32* %__b 11274 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11275 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11276 %2 = icmp sge <4 x i32> %0, %1 11277 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11278 %4 = bitcast <16 x i1> %3 to i16 11279 ret i16 %4 11280} 11281 11282define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 11283; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: 11284; VLX: # %bb.0: # %entry 11285; VLX-NEXT: kmovd %edi, %k1 11286; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11287; VLX-NEXT: kmovd %k0, %eax 11288; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11289; VLX-NEXT: retq 11290; 11291; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: 11292; NoVLX: # %bb.0: # %entry 11293; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11294; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 11295; NoVLX-NEXT: kmovw %edi, %k1 11296; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11297; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11298; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11299; NoVLX-NEXT: kmovw %k0, %eax 11300; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11301; NoVLX-NEXT: vzeroupper 11302; NoVLX-NEXT: retq 11303entry: 11304 %0 = bitcast <2 x i64> %__a to <4 x i32> 11305 %load = load i32, i32* %__b 11306 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11307 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11308 %2 = icmp sge <4 x i32> %0, %1 11309 %3 = bitcast i8 %__u to <8 x i1> 11310 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11311 %4 = and <4 x i1> %extract.i, %2 11312 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11313 %6 = bitcast <16 x i1> %5 to i16 11314 ret i16 %6 11315} 11316 11317 11318define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11319; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: 11320; VLX: # %bb.0: # %entry 11321; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 11322; VLX-NEXT: kmovd %k0, %eax 11323; VLX-NEXT: retq 11324; 11325; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: 11326; NoVLX: # %bb.0: # %entry 11327; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11328; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11329; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11330; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11331; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11332; NoVLX-NEXT: kmovw %k0, %eax 11333; NoVLX-NEXT: vzeroupper 11334; NoVLX-NEXT: retq 11335entry: 11336 %0 = bitcast <2 x i64> %__a to <4 x i32> 11337 %1 = bitcast <2 x i64> %__b to <4 x i32> 11338 %2 = icmp sge <4 x i32> %0, %1 11339 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11340 %4 = bitcast <32 x i1> %3 to i32 11341 ret i32 %4 11342} 11343 11344define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11345; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: 11346; VLX: # %bb.0: # %entry 11347; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 11348; VLX-NEXT: kmovd %k0, %eax 11349; VLX-NEXT: retq 11350; 11351; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: 11352; NoVLX: # %bb.0: # %entry 11353; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11354; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 11355; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11356; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11357; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11358; NoVLX-NEXT: kmovw %k0, %eax 11359; NoVLX-NEXT: vzeroupper 11360; NoVLX-NEXT: retq 11361entry: 11362 %0 = bitcast <2 x i64> %__a to <4 x i32> 11363 %load = load <2 x i64>, <2 x i64>* %__b 11364 %1 = bitcast <2 x i64> %load to <4 x i32> 11365 %2 = icmp sge <4 x i32> %0, %1 11366 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11367 %4 = bitcast <32 x i1> %3 to i32 11368 ret i32 %4 11369} 11370 11371define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11372; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: 11373; VLX: # %bb.0: # %entry 11374; VLX-NEXT: kmovd %edi, %k1 11375; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 11376; VLX-NEXT: kmovd %k0, %eax 11377; VLX-NEXT: retq 11378; 11379; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: 11380; NoVLX: # %bb.0: # %entry 11381; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11382; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11383; NoVLX-NEXT: kmovw %edi, %k1 11384; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11385; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11386; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11387; NoVLX-NEXT: kmovw %k0, %eax 11388; NoVLX-NEXT: vzeroupper 11389; NoVLX-NEXT: retq 11390entry: 11391 %0 = bitcast <2 x i64> %__a to <4 x i32> 11392 %1 = bitcast <2 x i64> %__b to <4 x i32> 11393 %2 = icmp sge <4 x i32> %0, %1 11394 %3 = bitcast i8 %__u to <8 x i1> 11395 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11396 %4 = and <4 x i1> %2, %extract.i 11397 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11398 %6 = bitcast <32 x i1> %5 to i32 11399 ret i32 %6 11400} 11401 11402define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11403; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: 11404; VLX: # %bb.0: # %entry 11405; VLX-NEXT: kmovd %edi, %k1 11406; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11407; VLX-NEXT: kmovd %k0, %eax 11408; VLX-NEXT: retq 11409; 11410; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: 11411; NoVLX: # %bb.0: # %entry 11412; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11413; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11414; NoVLX-NEXT: kmovw %edi, %k1 11415; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11416; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11417; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11418; NoVLX-NEXT: kmovw %k0, %eax 11419; NoVLX-NEXT: vzeroupper 11420; NoVLX-NEXT: retq 11421entry: 11422 %0 = bitcast <2 x i64> %__a to <4 x i32> 11423 %load = load <2 x i64>, <2 x i64>* %__b 11424 %1 = bitcast <2 x i64> %load to <4 x i32> 11425 %2 = icmp sge <4 x i32> %0, %1 11426 %3 = bitcast i8 %__u to <8 x i1> 11427 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11428 %4 = and <4 x i1> %2, %extract.i 11429 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11430 %6 = bitcast <32 x i1> %5 to i32 11431 ret i32 %6 11432} 11433 11434 11435define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 11436; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: 11437; VLX: # %bb.0: # %entry 11438; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11439; VLX-NEXT: kmovd %k0, %eax 11440; VLX-NEXT: retq 11441; 11442; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: 11443; NoVLX: # %bb.0: # %entry 11444; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11445; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 11446; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11447; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11448; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11449; NoVLX-NEXT: kmovw %k0, %eax 11450; NoVLX-NEXT: vzeroupper 11451; NoVLX-NEXT: retq 11452entry: 11453 %0 = bitcast <2 x i64> %__a to <4 x i32> 11454 %load = load i32, i32* %__b 11455 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11456 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11457 %2 = icmp sge <4 x i32> %0, %1 11458 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11459 %4 = bitcast <32 x i1> %3 to i32 11460 ret i32 %4 11461} 11462 11463define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 11464; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: 11465; VLX: # %bb.0: # %entry 11466; VLX-NEXT: kmovd %edi, %k1 11467; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11468; VLX-NEXT: kmovd %k0, %eax 11469; VLX-NEXT: retq 11470; 11471; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: 11472; NoVLX: # %bb.0: # %entry 11473; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11474; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 11475; NoVLX-NEXT: kmovw %edi, %k1 11476; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11477; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11478; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11479; NoVLX-NEXT: kmovw %k0, %eax 11480; NoVLX-NEXT: vzeroupper 11481; NoVLX-NEXT: retq 11482entry: 11483 %0 = bitcast <2 x i64> %__a to <4 x i32> 11484 %load = load i32, i32* %__b 11485 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11486 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11487 %2 = icmp sge <4 x i32> %0, %1 11488 %3 = bitcast i8 %__u to <8 x i1> 11489 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11490 %4 = and <4 x i1> %extract.i, %2 11491 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11492 %6 = bitcast <32 x i1> %5 to i32 11493 ret i32 %6 11494} 11495 11496 11497define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11498; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: 11499; VLX: # %bb.0: # %entry 11500; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 11501; VLX-NEXT: kmovq %k0, %rax 11502; VLX-NEXT: retq 11503; 11504; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: 11505; NoVLX: # %bb.0: # %entry 11506; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11507; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11508; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11509; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11510; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11511; NoVLX-NEXT: kmovw %k0, %eax 11512; NoVLX-NEXT: movzwl %ax, %eax 11513; NoVLX-NEXT: vzeroupper 11514; NoVLX-NEXT: retq 11515entry: 11516 %0 = bitcast <2 x i64> %__a to <4 x i32> 11517 %1 = bitcast <2 x i64> %__b to <4 x i32> 11518 %2 = icmp sge <4 x i32> %0, %1 11519 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11520 %4 = bitcast <64 x i1> %3 to i64 11521 ret i64 %4 11522} 11523 11524define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11525; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: 11526; VLX: # %bb.0: # %entry 11527; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 11528; VLX-NEXT: kmovq %k0, %rax 11529; VLX-NEXT: retq 11530; 11531; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: 11532; NoVLX: # %bb.0: # %entry 11533; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11534; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 11535; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11536; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11537; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11538; NoVLX-NEXT: kmovw %k0, %eax 11539; NoVLX-NEXT: movzwl %ax, %eax 11540; NoVLX-NEXT: vzeroupper 11541; NoVLX-NEXT: retq 11542entry: 11543 %0 = bitcast <2 x i64> %__a to <4 x i32> 11544 %load = load <2 x i64>, <2 x i64>* %__b 11545 %1 = bitcast <2 x i64> %load to <4 x i32> 11546 %2 = icmp sge <4 x i32> %0, %1 11547 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11548 %4 = bitcast <64 x i1> %3 to i64 11549 ret i64 %4 11550} 11551 11552define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11553; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: 11554; VLX: # %bb.0: # %entry 11555; VLX-NEXT: kmovd %edi, %k1 11556; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 11557; VLX-NEXT: kmovq %k0, %rax 11558; VLX-NEXT: retq 11559; 11560; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: 11561; NoVLX: # %bb.0: # %entry 11562; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11563; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11564; NoVLX-NEXT: kmovw %edi, %k1 11565; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11566; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11567; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11568; NoVLX-NEXT: kmovw %k0, %eax 11569; NoVLX-NEXT: movzwl %ax, %eax 11570; NoVLX-NEXT: vzeroupper 11571; NoVLX-NEXT: retq 11572entry: 11573 %0 = bitcast <2 x i64> %__a to <4 x i32> 11574 %1 = bitcast <2 x i64> %__b to <4 x i32> 11575 %2 = icmp sge <4 x i32> %0, %1 11576 %3 = bitcast i8 %__u to <8 x i1> 11577 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11578 %4 = and <4 x i1> %2, %extract.i 11579 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11580 %6 = bitcast <64 x i1> %5 to i64 11581 ret i64 %6 11582} 11583 11584define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 11585; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: 11586; VLX: # %bb.0: # %entry 11587; VLX-NEXT: kmovd %edi, %k1 11588; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11589; VLX-NEXT: kmovq %k0, %rax 11590; VLX-NEXT: retq 11591; 11592; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: 11593; NoVLX: # %bb.0: # %entry 11594; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11595; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11596; NoVLX-NEXT: kmovw %edi, %k1 11597; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11598; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11599; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11600; NoVLX-NEXT: kmovw %k0, %eax 11601; NoVLX-NEXT: movzwl %ax, %eax 11602; NoVLX-NEXT: vzeroupper 11603; NoVLX-NEXT: retq 11604entry: 11605 %0 = bitcast <2 x i64> %__a to <4 x i32> 11606 %load = load <2 x i64>, <2 x i64>* %__b 11607 %1 = bitcast <2 x i64> %load to <4 x i32> 11608 %2 = icmp sge <4 x i32> %0, %1 11609 %3 = bitcast i8 %__u to <8 x i1> 11610 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11611 %4 = and <4 x i1> %2, %extract.i 11612 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11613 %6 = bitcast <64 x i1> %5 to i64 11614 ret i64 %6 11615} 11616 11617 11618define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 11619; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: 11620; VLX: # %bb.0: # %entry 11621; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11622; VLX-NEXT: kmovq %k0, %rax 11623; VLX-NEXT: retq 11624; 11625; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: 11626; NoVLX: # %bb.0: # %entry 11627; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11628; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 11629; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11630; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11631; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11632; NoVLX-NEXT: kmovw %k0, %eax 11633; NoVLX-NEXT: movzwl %ax, %eax 11634; NoVLX-NEXT: vzeroupper 11635; NoVLX-NEXT: retq 11636entry: 11637 %0 = bitcast <2 x i64> %__a to <4 x i32> 11638 %load = load i32, i32* %__b 11639 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11640 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11641 %2 = icmp sge <4 x i32> %0, %1 11642 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11643 %4 = bitcast <64 x i1> %3 to i64 11644 ret i64 %4 11645} 11646 11647define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 11648; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: 11649; VLX: # %bb.0: # %entry 11650; VLX-NEXT: kmovd %edi, %k1 11651; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11652; VLX-NEXT: kmovq %k0, %rax 11653; VLX-NEXT: retq 11654; 11655; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: 11656; NoVLX: # %bb.0: # %entry 11657; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11658; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 11659; NoVLX-NEXT: kmovw %edi, %k1 11660; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11661; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11662; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11663; NoVLX-NEXT: kmovw %k0, %eax 11664; NoVLX-NEXT: movzwl %ax, %eax 11665; NoVLX-NEXT: vzeroupper 11666; NoVLX-NEXT: retq 11667entry: 11668 %0 = bitcast <2 x i64> %__a to <4 x i32> 11669 %load = load i32, i32* %__b 11670 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11671 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11672 %2 = icmp sge <4 x i32> %0, %1 11673 %3 = bitcast i8 %__u to <8 x i1> 11674 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11675 %4 = and <4 x i1> %extract.i, %2 11676 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11677 %6 = bitcast <64 x i1> %5 to i64 11678 ret i64 %6 11679} 11680 11681 11682define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11683; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: 11684; VLX: # %bb.0: # %entry 11685; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 11686; VLX-NEXT: kmovd %k0, %eax 11687; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11688; VLX-NEXT: vzeroupper 11689; VLX-NEXT: retq 11690; 11691; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: 11692; NoVLX: # %bb.0: # %entry 11693; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11694; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11695; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11696; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11697; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11698; NoVLX-NEXT: kmovw %k0, %eax 11699; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11700; NoVLX-NEXT: vzeroupper 11701; NoVLX-NEXT: retq 11702entry: 11703 %0 = bitcast <4 x i64> %__a to <8 x i32> 11704 %1 = bitcast <4 x i64> %__b to <8 x i32> 11705 %2 = icmp sge <8 x i32> %0, %1 11706 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11707 %4 = bitcast <16 x i1> %3 to i16 11708 ret i16 %4 11709} 11710 11711define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 11712; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: 11713; VLX: # %bb.0: # %entry 11714; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 11715; VLX-NEXT: kmovd %k0, %eax 11716; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11717; VLX-NEXT: vzeroupper 11718; VLX-NEXT: retq 11719; 11720; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: 11721; NoVLX: # %bb.0: # %entry 11722; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11723; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 11724; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11725; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11726; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11727; NoVLX-NEXT: kmovw %k0, %eax 11728; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11729; NoVLX-NEXT: vzeroupper 11730; NoVLX-NEXT: retq 11731entry: 11732 %0 = bitcast <4 x i64> %__a to <8 x i32> 11733 %load = load <4 x i64>, <4 x i64>* %__b 11734 %1 = bitcast <4 x i64> %load to <8 x i32> 11735 %2 = icmp sge <8 x i32> %0, %1 11736 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11737 %4 = bitcast <16 x i1> %3 to i16 11738 ret i16 %4 11739} 11740 11741define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11742; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: 11743; VLX: # %bb.0: # %entry 11744; VLX-NEXT: kmovd %edi, %k1 11745; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 11746; VLX-NEXT: kmovd %k0, %eax 11747; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11748; VLX-NEXT: vzeroupper 11749; VLX-NEXT: retq 11750; 11751; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: 11752; NoVLX: # %bb.0: # %entry 11753; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11754; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11755; NoVLX-NEXT: kmovw %edi, %k1 11756; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11757; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11758; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11759; NoVLX-NEXT: kmovw %k0, %eax 11760; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11761; NoVLX-NEXT: vzeroupper 11762; NoVLX-NEXT: retq 11763entry: 11764 %0 = bitcast <4 x i64> %__a to <8 x i32> 11765 %1 = bitcast <4 x i64> %__b to <8 x i32> 11766 %2 = icmp sge <8 x i32> %0, %1 11767 %3 = bitcast i8 %__u to <8 x i1> 11768 %4 = and <8 x i1> %2, %3 11769 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11770 %6 = bitcast <16 x i1> %5 to i16 11771 ret i16 %6 11772} 11773 11774define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 11775; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: 11776; VLX: # %bb.0: # %entry 11777; VLX-NEXT: kmovd %edi, %k1 11778; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 11779; VLX-NEXT: kmovd %k0, %eax 11780; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11781; VLX-NEXT: vzeroupper 11782; VLX-NEXT: retq 11783; 11784; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: 11785; NoVLX: # %bb.0: # %entry 11786; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11787; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 11788; NoVLX-NEXT: kmovw %edi, %k1 11789; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11790; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11791; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11792; NoVLX-NEXT: kmovw %k0, %eax 11793; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11794; NoVLX-NEXT: vzeroupper 11795; NoVLX-NEXT: retq 11796entry: 11797 %0 = bitcast <4 x i64> %__a to <8 x i32> 11798 %load = load <4 x i64>, <4 x i64>* %__b 11799 %1 = bitcast <4 x i64> %load to <8 x i32> 11800 %2 = icmp sge <8 x i32> %0, %1 11801 %3 = bitcast i8 %__u to <8 x i1> 11802 %4 = and <8 x i1> %2, %3 11803 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11804 %6 = bitcast <16 x i1> %5 to i16 11805 ret i16 %6 11806} 11807 11808 11809define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 11810; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: 11811; VLX: # %bb.0: # %entry 11812; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 11813; VLX-NEXT: kmovd %k0, %eax 11814; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11815; VLX-NEXT: vzeroupper 11816; VLX-NEXT: retq 11817; 11818; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: 11819; NoVLX: # %bb.0: # %entry 11820; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11821; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 11822; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11823; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11824; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11825; NoVLX-NEXT: kmovw %k0, %eax 11826; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11827; NoVLX-NEXT: vzeroupper 11828; NoVLX-NEXT: retq 11829entry: 11830 %0 = bitcast <4 x i64> %__a to <8 x i32> 11831 %load = load i32, i32* %__b 11832 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11833 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11834 %2 = icmp sge <8 x i32> %0, %1 11835 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11836 %4 = bitcast <16 x i1> %3 to i16 11837 ret i16 %4 11838} 11839 11840define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 11841; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: 11842; VLX: # %bb.0: # %entry 11843; VLX-NEXT: kmovd %edi, %k1 11844; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 11845; VLX-NEXT: kmovd %k0, %eax 11846; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11847; VLX-NEXT: vzeroupper 11848; VLX-NEXT: retq 11849; 11850; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: 11851; NoVLX: # %bb.0: # %entry 11852; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11853; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 11854; NoVLX-NEXT: kmovw %edi, %k1 11855; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11856; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11857; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11858; NoVLX-NEXT: kmovw %k0, %eax 11859; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11860; NoVLX-NEXT: vzeroupper 11861; NoVLX-NEXT: retq 11862entry: 11863 %0 = bitcast <4 x i64> %__a to <8 x i32> 11864 %load = load i32, i32* %__b 11865 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11866 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11867 %2 = icmp sge <8 x i32> %0, %1 11868 %3 = bitcast i8 %__u to <8 x i1> 11869 %4 = and <8 x i1> %3, %2 11870 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11871 %6 = bitcast <16 x i1> %5 to i16 11872 ret i16 %6 11873} 11874 11875 11876define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11877; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: 11878; VLX: # %bb.0: # %entry 11879; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 11880; VLX-NEXT: kmovd %k0, %eax 11881; VLX-NEXT: vzeroupper 11882; VLX-NEXT: retq 11883; 11884; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: 11885; NoVLX: # %bb.0: # %entry 11886; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11887; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11888; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11889; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11890; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11891; NoVLX-NEXT: kmovw %k0, %eax 11892; NoVLX-NEXT: vzeroupper 11893; NoVLX-NEXT: retq 11894entry: 11895 %0 = bitcast <4 x i64> %__a to <8 x i32> 11896 %1 = bitcast <4 x i64> %__b to <8 x i32> 11897 %2 = icmp sge <8 x i32> %0, %1 11898 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11899 %4 = bitcast <32 x i1> %3 to i32 11900 ret i32 %4 11901} 11902 11903define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 11904; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: 11905; VLX: # %bb.0: # %entry 11906; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 11907; VLX-NEXT: kmovd %k0, %eax 11908; VLX-NEXT: vzeroupper 11909; VLX-NEXT: retq 11910; 11911; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: 11912; NoVLX: # %bb.0: # %entry 11913; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11914; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 11915; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11916; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11917; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11918; NoVLX-NEXT: kmovw %k0, %eax 11919; NoVLX-NEXT: vzeroupper 11920; NoVLX-NEXT: retq 11921entry: 11922 %0 = bitcast <4 x i64> %__a to <8 x i32> 11923 %load = load <4 x i64>, <4 x i64>* %__b 11924 %1 = bitcast <4 x i64> %load to <8 x i32> 11925 %2 = icmp sge <8 x i32> %0, %1 11926 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11927 %4 = bitcast <32 x i1> %3 to i32 11928 ret i32 %4 11929} 11930 11931define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11932; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: 11933; VLX: # %bb.0: # %entry 11934; VLX-NEXT: kmovd %edi, %k1 11935; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 11936; VLX-NEXT: kmovd %k0, %eax 11937; VLX-NEXT: vzeroupper 11938; VLX-NEXT: retq 11939; 11940; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: 11941; NoVLX: # %bb.0: # %entry 11942; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11943; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11944; NoVLX-NEXT: kmovw %edi, %k1 11945; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11946; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11947; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11948; NoVLX-NEXT: kmovw %k0, %eax 11949; NoVLX-NEXT: vzeroupper 11950; NoVLX-NEXT: retq 11951entry: 11952 %0 = bitcast <4 x i64> %__a to <8 x i32> 11953 %1 = bitcast <4 x i64> %__b to <8 x i32> 11954 %2 = icmp sge <8 x i32> %0, %1 11955 %3 = bitcast i8 %__u to <8 x i1> 11956 %4 = and <8 x i1> %2, %3 11957 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11958 %6 = bitcast <32 x i1> %5 to i32 11959 ret i32 %6 11960} 11961 11962define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 11963; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: 11964; VLX: # %bb.0: # %entry 11965; VLX-NEXT: kmovd %edi, %k1 11966; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 11967; VLX-NEXT: kmovd %k0, %eax 11968; VLX-NEXT: vzeroupper 11969; VLX-NEXT: retq 11970; 11971; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: 11972; NoVLX: # %bb.0: # %entry 11973; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11974; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 11975; NoVLX-NEXT: kmovw %edi, %k1 11976; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11977; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11978; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11979; NoVLX-NEXT: kmovw %k0, %eax 11980; NoVLX-NEXT: vzeroupper 11981; NoVLX-NEXT: retq 11982entry: 11983 %0 = bitcast <4 x i64> %__a to <8 x i32> 11984 %load = load <4 x i64>, <4 x i64>* %__b 11985 %1 = bitcast <4 x i64> %load to <8 x i32> 11986 %2 = icmp sge <8 x i32> %0, %1 11987 %3 = bitcast i8 %__u to <8 x i1> 11988 %4 = and <8 x i1> %2, %3 11989 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11990 %6 = bitcast <32 x i1> %5 to i32 11991 ret i32 %6 11992} 11993 11994 11995define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 11996; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: 11997; VLX: # %bb.0: # %entry 11998; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 11999; VLX-NEXT: kmovd %k0, %eax 12000; VLX-NEXT: vzeroupper 12001; VLX-NEXT: retq 12002; 12003; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: 12004; NoVLX: # %bb.0: # %entry 12005; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12006; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 12007; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12008; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12009; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12010; NoVLX-NEXT: kmovw %k0, %eax 12011; NoVLX-NEXT: vzeroupper 12012; NoVLX-NEXT: retq 12013entry: 12014 %0 = bitcast <4 x i64> %__a to <8 x i32> 12015 %load = load i32, i32* %__b 12016 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 12017 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12018 %2 = icmp sge <8 x i32> %0, %1 12019 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12020 %4 = bitcast <32 x i1> %3 to i32 12021 ret i32 %4 12022} 12023 12024define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 12025; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: 12026; VLX: # %bb.0: # %entry 12027; VLX-NEXT: kmovd %edi, %k1 12028; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 12029; VLX-NEXT: kmovd %k0, %eax 12030; VLX-NEXT: vzeroupper 12031; VLX-NEXT: retq 12032; 12033; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: 12034; NoVLX: # %bb.0: # %entry 12035; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12036; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 12037; NoVLX-NEXT: kmovw %edi, %k1 12038; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12039; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12040; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12041; NoVLX-NEXT: kmovw %k0, %eax 12042; NoVLX-NEXT: vzeroupper 12043; NoVLX-NEXT: retq 12044entry: 12045 %0 = bitcast <4 x i64> %__a to <8 x i32> 12046 %load = load i32, i32* %__b 12047 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 12048 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12049 %2 = icmp sge <8 x i32> %0, %1 12050 %3 = bitcast i8 %__u to <8 x i1> 12051 %4 = and <8 x i1> %3, %2 12052 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12053 %6 = bitcast <32 x i1> %5 to i32 12054 ret i32 %6 12055} 12056 12057 12058define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 12059; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: 12060; VLX: # %bb.0: # %entry 12061; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 12062; VLX-NEXT: kmovq %k0, %rax 12063; VLX-NEXT: vzeroupper 12064; VLX-NEXT: retq 12065; 12066; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: 12067; NoVLX: # %bb.0: # %entry 12068; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 12069; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12070; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12071; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12072; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12073; NoVLX-NEXT: kmovw %k0, %eax 12074; NoVLX-NEXT: movzwl %ax, %eax 12075; NoVLX-NEXT: vzeroupper 12076; NoVLX-NEXT: retq 12077entry: 12078 %0 = bitcast <4 x i64> %__a to <8 x i32> 12079 %1 = bitcast <4 x i64> %__b to <8 x i32> 12080 %2 = icmp sge <8 x i32> %0, %1 12081 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12082 %4 = bitcast <64 x i1> %3 to i64 12083 ret i64 %4 12084} 12085 12086define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 12087; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: 12088; VLX: # %bb.0: # %entry 12089; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 12090; VLX-NEXT: kmovq %k0, %rax 12091; VLX-NEXT: vzeroupper 12092; VLX-NEXT: retq 12093; 12094; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: 12095; NoVLX: # %bb.0: # %entry 12096; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12097; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 12098; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12099; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12100; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12101; NoVLX-NEXT: kmovw %k0, %eax 12102; NoVLX-NEXT: movzwl %ax, %eax 12103; NoVLX-NEXT: vzeroupper 12104; NoVLX-NEXT: retq 12105entry: 12106 %0 = bitcast <4 x i64> %__a to <8 x i32> 12107 %load = load <4 x i64>, <4 x i64>* %__b 12108 %1 = bitcast <4 x i64> %load to <8 x i32> 12109 %2 = icmp sge <8 x i32> %0, %1 12110 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12111 %4 = bitcast <64 x i1> %3 to i64 12112 ret i64 %4 12113} 12114 12115define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 12116; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: 12117; VLX: # %bb.0: # %entry 12118; VLX-NEXT: kmovd %edi, %k1 12119; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 12120; VLX-NEXT: kmovq %k0, %rax 12121; VLX-NEXT: vzeroupper 12122; VLX-NEXT: retq 12123; 12124; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: 12125; NoVLX: # %bb.0: # %entry 12126; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 12127; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12128; NoVLX-NEXT: kmovw %edi, %k1 12129; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12130; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12131; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12132; NoVLX-NEXT: kmovw %k0, %eax 12133; NoVLX-NEXT: movzwl %ax, %eax 12134; NoVLX-NEXT: vzeroupper 12135; NoVLX-NEXT: retq 12136entry: 12137 %0 = bitcast <4 x i64> %__a to <8 x i32> 12138 %1 = bitcast <4 x i64> %__b to <8 x i32> 12139 %2 = icmp sge <8 x i32> %0, %1 12140 %3 = bitcast i8 %__u to <8 x i1> 12141 %4 = and <8 x i1> %2, %3 12142 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12143 %6 = bitcast <64 x i1> %5 to i64 12144 ret i64 %6 12145} 12146 12147define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 12148; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: 12149; VLX: # %bb.0: # %entry 12150; VLX-NEXT: kmovd %edi, %k1 12151; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 12152; VLX-NEXT: kmovq %k0, %rax 12153; VLX-NEXT: vzeroupper 12154; VLX-NEXT: retq 12155; 12156; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: 12157; NoVLX: # %bb.0: # %entry 12158; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12159; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 12160; NoVLX-NEXT: kmovw %edi, %k1 12161; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12162; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12163; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12164; NoVLX-NEXT: kmovw %k0, %eax 12165; NoVLX-NEXT: movzwl %ax, %eax 12166; NoVLX-NEXT: vzeroupper 12167; NoVLX-NEXT: retq 12168entry: 12169 %0 = bitcast <4 x i64> %__a to <8 x i32> 12170 %load = load <4 x i64>, <4 x i64>* %__b 12171 %1 = bitcast <4 x i64> %load to <8 x i32> 12172 %2 = icmp sge <8 x i32> %0, %1 12173 %3 = bitcast i8 %__u to <8 x i1> 12174 %4 = and <8 x i1> %2, %3 12175 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12176 %6 = bitcast <64 x i1> %5 to i64 12177 ret i64 %6 12178} 12179 12180 12181define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 12182; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: 12183; VLX: # %bb.0: # %entry 12184; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 12185; VLX-NEXT: kmovq %k0, %rax 12186; VLX-NEXT: vzeroupper 12187; VLX-NEXT: retq 12188; 12189; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: 12190; NoVLX: # %bb.0: # %entry 12191; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12192; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 12193; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12194; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12195; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12196; NoVLX-NEXT: kmovw %k0, %eax 12197; NoVLX-NEXT: movzwl %ax, %eax 12198; NoVLX-NEXT: vzeroupper 12199; NoVLX-NEXT: retq 12200entry: 12201 %0 = bitcast <4 x i64> %__a to <8 x i32> 12202 %load = load i32, i32* %__b 12203 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 12204 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12205 %2 = icmp sge <8 x i32> %0, %1 12206 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12207 %4 = bitcast <64 x i1> %3 to i64 12208 ret i64 %4 12209} 12210 12211define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 12212; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: 12213; VLX: # %bb.0: # %entry 12214; VLX-NEXT: kmovd %edi, %k1 12215; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 12216; VLX-NEXT: kmovq %k0, %rax 12217; VLX-NEXT: vzeroupper 12218; VLX-NEXT: retq 12219; 12220; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: 12221; NoVLX: # %bb.0: # %entry 12222; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12223; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 12224; NoVLX-NEXT: kmovw %edi, %k1 12225; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12226; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12227; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12228; NoVLX-NEXT: kmovw %k0, %eax 12229; NoVLX-NEXT: movzwl %ax, %eax 12230; NoVLX-NEXT: vzeroupper 12231; NoVLX-NEXT: retq 12232entry: 12233 %0 = bitcast <4 x i64> %__a to <8 x i32> 12234 %load = load i32, i32* %__b 12235 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 12236 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12237 %2 = icmp sge <8 x i32> %0, %1 12238 %3 = bitcast i8 %__u to <8 x i1> 12239 %4 = and <8 x i1> %3, %2 12240 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12241 %6 = bitcast <64 x i1> %5 to i64 12242 ret i64 %6 12243} 12244 12245 12246define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12247; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: 12248; VLX: # %bb.0: # %entry 12249; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12250; VLX-NEXT: kmovd %k0, %eax 12251; VLX-NEXT: vzeroupper 12252; VLX-NEXT: retq 12253; 12254; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: 12255; NoVLX: # %bb.0: # %entry 12256; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12257; NoVLX-NEXT: kmovw %k0, %eax 12258; NoVLX-NEXT: vzeroupper 12259; NoVLX-NEXT: retq 12260entry: 12261 %0 = bitcast <8 x i64> %__a to <16 x i32> 12262 %1 = bitcast <8 x i64> %__b to <16 x i32> 12263 %2 = icmp sge <16 x i32> %0, %1 12264 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12265 %4 = bitcast <32 x i1> %3 to i32 12266 ret i32 %4 12267} 12268 12269define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 12270; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: 12271; VLX: # %bb.0: # %entry 12272; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12273; VLX-NEXT: kmovd %k0, %eax 12274; VLX-NEXT: vzeroupper 12275; VLX-NEXT: retq 12276; 12277; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: 12278; NoVLX: # %bb.0: # %entry 12279; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12280; NoVLX-NEXT: kmovw %k0, %eax 12281; NoVLX-NEXT: vzeroupper 12282; NoVLX-NEXT: retq 12283entry: 12284 %0 = bitcast <8 x i64> %__a to <16 x i32> 12285 %load = load <8 x i64>, <8 x i64>* %__b 12286 %1 = bitcast <8 x i64> %load to <16 x i32> 12287 %2 = icmp sge <16 x i32> %0, %1 12288 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12289 %4 = bitcast <32 x i1> %3 to i32 12290 ret i32 %4 12291} 12292 12293define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12294; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: 12295; VLX: # %bb.0: # %entry 12296; VLX-NEXT: kmovd %edi, %k1 12297; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12298; VLX-NEXT: kmovd %k0, %eax 12299; VLX-NEXT: vzeroupper 12300; VLX-NEXT: retq 12301; 12302; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: 12303; NoVLX: # %bb.0: # %entry 12304; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12305; NoVLX-NEXT: kmovw %k0, %eax 12306; NoVLX-NEXT: andl %edi, %eax 12307; NoVLX-NEXT: vzeroupper 12308; NoVLX-NEXT: retq 12309entry: 12310 %0 = bitcast <8 x i64> %__a to <16 x i32> 12311 %1 = bitcast <8 x i64> %__b to <16 x i32> 12312 %2 = icmp sge <16 x i32> %0, %1 12313 %3 = bitcast i16 %__u to <16 x i1> 12314 %4 = and <16 x i1> %2, %3 12315 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12316 %6 = bitcast <32 x i1> %5 to i32 12317 ret i32 %6 12318} 12319 12320define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 12321; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: 12322; VLX: # %bb.0: # %entry 12323; VLX-NEXT: kmovd %edi, %k1 12324; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} 12325; VLX-NEXT: kmovd %k0, %eax 12326; VLX-NEXT: vzeroupper 12327; VLX-NEXT: retq 12328; 12329; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: 12330; NoVLX: # %bb.0: # %entry 12331; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 12332; NoVLX-NEXT: kmovw %k0, %eax 12333; NoVLX-NEXT: andl %edi, %eax 12334; NoVLX-NEXT: vzeroupper 12335; NoVLX-NEXT: retq 12336entry: 12337 %0 = bitcast <8 x i64> %__a to <16 x i32> 12338 %load = load <8 x i64>, <8 x i64>* %__b 12339 %1 = bitcast <8 x i64> %load to <16 x i32> 12340 %2 = icmp sge <16 x i32> %0, %1 12341 %3 = bitcast i16 %__u to <16 x i1> 12342 %4 = and <16 x i1> %2, %3 12343 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12344 %6 = bitcast <32 x i1> %5 to i32 12345 ret i32 %6 12346} 12347 12348 12349define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 12350; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: 12351; VLX: # %bb.0: # %entry 12352; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12353; VLX-NEXT: kmovd %k0, %eax 12354; VLX-NEXT: vzeroupper 12355; VLX-NEXT: retq 12356; 12357; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: 12358; NoVLX: # %bb.0: # %entry 12359; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12360; NoVLX-NEXT: kmovw %k0, %eax 12361; NoVLX-NEXT: vzeroupper 12362; NoVLX-NEXT: retq 12363entry: 12364 %0 = bitcast <8 x i64> %__a to <16 x i32> 12365 %load = load i32, i32* %__b 12366 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12367 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12368 %2 = icmp sge <16 x i32> %0, %1 12369 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12370 %4 = bitcast <32 x i1> %3 to i32 12371 ret i32 %4 12372} 12373 12374define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 12375; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: 12376; VLX: # %bb.0: # %entry 12377; VLX-NEXT: kmovd %edi, %k1 12378; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 12379; VLX-NEXT: kmovd %k0, %eax 12380; VLX-NEXT: vzeroupper 12381; VLX-NEXT: retq 12382; 12383; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: 12384; NoVLX: # %bb.0: # %entry 12385; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 12386; NoVLX-NEXT: kmovw %k0, %eax 12387; NoVLX-NEXT: andl %edi, %eax 12388; NoVLX-NEXT: vzeroupper 12389; NoVLX-NEXT: retq 12390entry: 12391 %0 = bitcast <8 x i64> %__a to <16 x i32> 12392 %load = load i32, i32* %__b 12393 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12394 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12395 %2 = icmp sge <16 x i32> %0, %1 12396 %3 = bitcast i16 %__u to <16 x i1> 12397 %4 = and <16 x i1> %3, %2 12398 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12399 %6 = bitcast <32 x i1> %5 to i32 12400 ret i32 %6 12401} 12402 12403 12404define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12405; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: 12406; VLX: # %bb.0: # %entry 12407; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12408; VLX-NEXT: kmovq %k0, %rax 12409; VLX-NEXT: vzeroupper 12410; VLX-NEXT: retq 12411; 12412; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: 12413; NoVLX: # %bb.0: # %entry 12414; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12415; NoVLX-NEXT: kmovw %k0, %eax 12416; NoVLX-NEXT: movzwl %ax, %eax 12417; NoVLX-NEXT: vzeroupper 12418; NoVLX-NEXT: retq 12419entry: 12420 %0 = bitcast <8 x i64> %__a to <16 x i32> 12421 %1 = bitcast <8 x i64> %__b to <16 x i32> 12422 %2 = icmp sge <16 x i32> %0, %1 12423 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12424 %4 = bitcast <64 x i1> %3 to i64 12425 ret i64 %4 12426} 12427 12428define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 12429; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: 12430; VLX: # %bb.0: # %entry 12431; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12432; VLX-NEXT: kmovq %k0, %rax 12433; VLX-NEXT: vzeroupper 12434; VLX-NEXT: retq 12435; 12436; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: 12437; NoVLX: # %bb.0: # %entry 12438; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12439; NoVLX-NEXT: kmovw %k0, %eax 12440; NoVLX-NEXT: movzwl %ax, %eax 12441; NoVLX-NEXT: vzeroupper 12442; NoVLX-NEXT: retq 12443entry: 12444 %0 = bitcast <8 x i64> %__a to <16 x i32> 12445 %load = load <8 x i64>, <8 x i64>* %__b 12446 %1 = bitcast <8 x i64> %load to <16 x i32> 12447 %2 = icmp sge <16 x i32> %0, %1 12448 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12449 %4 = bitcast <64 x i1> %3 to i64 12450 ret i64 %4 12451} 12452 12453define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12454; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: 12455; VLX: # %bb.0: # %entry 12456; VLX-NEXT: kmovd %edi, %k1 12457; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12458; VLX-NEXT: kmovq %k0, %rax 12459; VLX-NEXT: vzeroupper 12460; VLX-NEXT: retq 12461; 12462; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: 12463; NoVLX: # %bb.0: # %entry 12464; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12465; NoVLX-NEXT: kmovw %k0, %eax 12466; NoVLX-NEXT: andl %edi, %eax 12467; NoVLX-NEXT: vzeroupper 12468; NoVLX-NEXT: retq 12469entry: 12470 %0 = bitcast <8 x i64> %__a to <16 x i32> 12471 %1 = bitcast <8 x i64> %__b to <16 x i32> 12472 %2 = icmp sge <16 x i32> %0, %1 12473 %3 = bitcast i16 %__u to <16 x i1> 12474 %4 = and <16 x i1> %2, %3 12475 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12476 %6 = bitcast <64 x i1> %5 to i64 12477 ret i64 %6 12478} 12479 12480define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 12481; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: 12482; VLX: # %bb.0: # %entry 12483; VLX-NEXT: kmovd %edi, %k1 12484; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} 12485; VLX-NEXT: kmovq %k0, %rax 12486; VLX-NEXT: vzeroupper 12487; VLX-NEXT: retq 12488; 12489; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: 12490; NoVLX: # %bb.0: # %entry 12491; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 12492; NoVLX-NEXT: kmovw %k0, %eax 12493; NoVLX-NEXT: andl %edi, %eax 12494; NoVLX-NEXT: vzeroupper 12495; NoVLX-NEXT: retq 12496entry: 12497 %0 = bitcast <8 x i64> %__a to <16 x i32> 12498 %load = load <8 x i64>, <8 x i64>* %__b 12499 %1 = bitcast <8 x i64> %load to <16 x i32> 12500 %2 = icmp sge <16 x i32> %0, %1 12501 %3 = bitcast i16 %__u to <16 x i1> 12502 %4 = and <16 x i1> %2, %3 12503 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12504 %6 = bitcast <64 x i1> %5 to i64 12505 ret i64 %6 12506} 12507 12508 12509define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 12510; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: 12511; VLX: # %bb.0: # %entry 12512; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12513; VLX-NEXT: kmovq %k0, %rax 12514; VLX-NEXT: vzeroupper 12515; VLX-NEXT: retq 12516; 12517; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: 12518; NoVLX: # %bb.0: # %entry 12519; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12520; NoVLX-NEXT: kmovw %k0, %eax 12521; NoVLX-NEXT: movzwl %ax, %eax 12522; NoVLX-NEXT: vzeroupper 12523; NoVLX-NEXT: retq 12524entry: 12525 %0 = bitcast <8 x i64> %__a to <16 x i32> 12526 %load = load i32, i32* %__b 12527 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12528 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12529 %2 = icmp sge <16 x i32> %0, %1 12530 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12531 %4 = bitcast <64 x i1> %3 to i64 12532 ret i64 %4 12533} 12534 12535define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 12536; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: 12537; VLX: # %bb.0: # %entry 12538; VLX-NEXT: kmovd %edi, %k1 12539; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 12540; VLX-NEXT: kmovq %k0, %rax 12541; VLX-NEXT: vzeroupper 12542; VLX-NEXT: retq 12543; 12544; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: 12545; NoVLX: # %bb.0: # %entry 12546; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 12547; NoVLX-NEXT: kmovw %k0, %eax 12548; NoVLX-NEXT: andl %edi, %eax 12549; NoVLX-NEXT: vzeroupper 12550; NoVLX-NEXT: retq 12551entry: 12552 %0 = bitcast <8 x i64> %__a to <16 x i32> 12553 %load = load i32, i32* %__b 12554 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12555 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12556 %2 = icmp sge <16 x i32> %0, %1 12557 %3 = bitcast i16 %__u to <16 x i1> 12558 %4 = and <16 x i1> %3, %2 12559 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12560 %6 = bitcast <64 x i1> %5 to i64 12561 ret i64 %6 12562} 12563 12564 12565define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12566; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: 12567; VLX: # %bb.0: # %entry 12568; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12569; VLX-NEXT: kmovb %k0, %eax 12570; VLX-NEXT: retq 12571; 12572; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: 12573; NoVLX: # %bb.0: # %entry 12574; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12575; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12576; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12577; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12578; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12579; NoVLX-NEXT: kmovw %k0, %eax 12580; NoVLX-NEXT: andl $3, %eax 12581; NoVLX-NEXT: vzeroupper 12582; NoVLX-NEXT: retq 12583entry: 12584 %0 = bitcast <2 x i64> %__a to <2 x i64> 12585 %1 = bitcast <2 x i64> %__b to <2 x i64> 12586 %2 = icmp sge <2 x i64> %0, %1 12587 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12588 %4 = bitcast <4 x i1> %3 to i4 12589 ret i4 %4 12590} 12591 12592define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 12593; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: 12594; VLX: # %bb.0: # %entry 12595; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12596; VLX-NEXT: kmovb %k0, %eax 12597; VLX-NEXT: retq 12598; 12599; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: 12600; NoVLX: # %bb.0: # %entry 12601; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12602; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12603; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12604; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12605; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12606; NoVLX-NEXT: kmovw %k0, %eax 12607; NoVLX-NEXT: andl $3, %eax 12608; NoVLX-NEXT: vzeroupper 12609; NoVLX-NEXT: retq 12610entry: 12611 %0 = bitcast <2 x i64> %__a to <2 x i64> 12612 %load = load <2 x i64>, <2 x i64>* %__b 12613 %1 = bitcast <2 x i64> %load to <2 x i64> 12614 %2 = icmp sge <2 x i64> %0, %1 12615 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12616 %4 = bitcast <4 x i1> %3 to i4 12617 ret i4 %4 12618} 12619 12620define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12621; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: 12622; VLX: # %bb.0: # %entry 12623; VLX-NEXT: kmovd %edi, %k1 12624; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12625; VLX-NEXT: kmovb %k0, %eax 12626; VLX-NEXT: retq 12627; 12628; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: 12629; NoVLX: # %bb.0: # %entry 12630; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12631; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12632; NoVLX-NEXT: kmovw %edi, %k1 12633; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12634; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12635; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12636; NoVLX-NEXT: kmovw %k0, %eax 12637; NoVLX-NEXT: andl $3, %eax 12638; NoVLX-NEXT: vzeroupper 12639; NoVLX-NEXT: retq 12640entry: 12641 %0 = bitcast <2 x i64> %__a to <2 x i64> 12642 %1 = bitcast <2 x i64> %__b to <2 x i64> 12643 %2 = icmp sge <2 x i64> %0, %1 12644 %3 = bitcast i8 %__u to <8 x i1> 12645 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12646 %4 = and <2 x i1> %2, %extract.i 12647 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12648 %6 = bitcast <4 x i1> %5 to i4 12649 ret i4 %6 12650} 12651 12652define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 12653; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: 12654; VLX: # %bb.0: # %entry 12655; VLX-NEXT: kmovd %edi, %k1 12656; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12657; VLX-NEXT: kmovb %k0, %eax 12658; VLX-NEXT: retq 12659; 12660; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: 12661; NoVLX: # %bb.0: # %entry 12662; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12663; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12664; NoVLX-NEXT: kmovw %edi, %k1 12665; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12666; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12667; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12668; NoVLX-NEXT: kmovw %k0, %eax 12669; NoVLX-NEXT: andl $3, %eax 12670; NoVLX-NEXT: vzeroupper 12671; NoVLX-NEXT: retq 12672entry: 12673 %0 = bitcast <2 x i64> %__a to <2 x i64> 12674 %load = load <2 x i64>, <2 x i64>* %__b 12675 %1 = bitcast <2 x i64> %load to <2 x i64> 12676 %2 = icmp sge <2 x i64> %0, %1 12677 %3 = bitcast i8 %__u to <8 x i1> 12678 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12679 %4 = and <2 x i1> %2, %extract.i 12680 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12681 %6 = bitcast <4 x i1> %5 to i4 12682 ret i4 %6 12683} 12684 12685 12686define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 12687; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12688; VLX: # %bb.0: # %entry 12689; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 12690; VLX-NEXT: kmovb %k0, %eax 12691; VLX-NEXT: retq 12692; 12693; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12694; NoVLX: # %bb.0: # %entry 12695; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12696; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 12697; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12698; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12699; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12700; NoVLX-NEXT: kmovw %k0, %eax 12701; NoVLX-NEXT: andl $3, %eax 12702; NoVLX-NEXT: vzeroupper 12703; NoVLX-NEXT: retq 12704entry: 12705 %0 = bitcast <2 x i64> %__a to <2 x i64> 12706 %load = load i64, i64* %__b 12707 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12708 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12709 %2 = icmp sge <2 x i64> %0, %1 12710 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12711 %4 = bitcast <4 x i1> %3 to i4 12712 ret i4 %4 12713} 12714 12715define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 12716; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12717; VLX: # %bb.0: # %entry 12718; VLX-NEXT: kmovd %edi, %k1 12719; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 12720; VLX-NEXT: kmovb %k0, %eax 12721; VLX-NEXT: retq 12722; 12723; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12724; NoVLX: # %bb.0: # %entry 12725; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12726; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 12727; NoVLX-NEXT: kmovw %edi, %k1 12728; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12729; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12730; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12731; NoVLX-NEXT: kmovw %k0, %eax 12732; NoVLX-NEXT: andl $3, %eax 12733; NoVLX-NEXT: vzeroupper 12734; NoVLX-NEXT: retq 12735entry: 12736 %0 = bitcast <2 x i64> %__a to <2 x i64> 12737 %load = load i64, i64* %__b 12738 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12739 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12740 %2 = icmp sge <2 x i64> %0, %1 12741 %3 = bitcast i8 %__u to <8 x i1> 12742 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12743 %4 = and <2 x i1> %extract.i, %2 12744 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12745 %6 = bitcast <4 x i1> %5 to i4 12746 ret i4 %6 12747} 12748 12749 12750define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12751; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: 12752; VLX: # %bb.0: # %entry 12753; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12754; VLX-NEXT: kmovd %k0, %eax 12755; VLX-NEXT: # kill: def $al killed $al killed $eax 12756; VLX-NEXT: retq 12757; 12758; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: 12759; NoVLX: # %bb.0: # %entry 12760; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12761; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12762; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12763; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12764; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12765; NoVLX-NEXT: kmovw %k0, %eax 12766; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12767; NoVLX-NEXT: vzeroupper 12768; NoVLX-NEXT: retq 12769entry: 12770 %0 = bitcast <2 x i64> %__a to <2 x i64> 12771 %1 = bitcast <2 x i64> %__b to <2 x i64> 12772 %2 = icmp sge <2 x i64> %0, %1 12773 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12774 %4 = bitcast <8 x i1> %3 to i8 12775 ret i8 %4 12776} 12777 12778define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 12779; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: 12780; VLX: # %bb.0: # %entry 12781; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12782; VLX-NEXT: kmovd %k0, %eax 12783; VLX-NEXT: # kill: def $al killed $al killed $eax 12784; VLX-NEXT: retq 12785; 12786; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: 12787; NoVLX: # %bb.0: # %entry 12788; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12789; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12790; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12791; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12792; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12793; NoVLX-NEXT: kmovw %k0, %eax 12794; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12795; NoVLX-NEXT: vzeroupper 12796; NoVLX-NEXT: retq 12797entry: 12798 %0 = bitcast <2 x i64> %__a to <2 x i64> 12799 %load = load <2 x i64>, <2 x i64>* %__b 12800 %1 = bitcast <2 x i64> %load to <2 x i64> 12801 %2 = icmp sge <2 x i64> %0, %1 12802 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12803 %4 = bitcast <8 x i1> %3 to i8 12804 ret i8 %4 12805} 12806 12807define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12808; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: 12809; VLX: # %bb.0: # %entry 12810; VLX-NEXT: kmovd %edi, %k1 12811; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12812; VLX-NEXT: kmovd %k0, %eax 12813; VLX-NEXT: # kill: def $al killed $al killed $eax 12814; VLX-NEXT: retq 12815; 12816; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: 12817; NoVLX: # %bb.0: # %entry 12818; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12819; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12820; NoVLX-NEXT: kmovw %edi, %k1 12821; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12822; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12823; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12824; NoVLX-NEXT: kmovw %k0, %eax 12825; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12826; NoVLX-NEXT: vzeroupper 12827; NoVLX-NEXT: retq 12828entry: 12829 %0 = bitcast <2 x i64> %__a to <2 x i64> 12830 %1 = bitcast <2 x i64> %__b to <2 x i64> 12831 %2 = icmp sge <2 x i64> %0, %1 12832 %3 = bitcast i8 %__u to <8 x i1> 12833 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12834 %4 = and <2 x i1> %2, %extract.i 12835 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12836 %6 = bitcast <8 x i1> %5 to i8 12837 ret i8 %6 12838} 12839 12840define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 12841; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: 12842; VLX: # %bb.0: # %entry 12843; VLX-NEXT: kmovd %edi, %k1 12844; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12845; VLX-NEXT: kmovd %k0, %eax 12846; VLX-NEXT: # kill: def $al killed $al killed $eax 12847; VLX-NEXT: retq 12848; 12849; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: 12850; NoVLX: # %bb.0: # %entry 12851; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12852; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12853; NoVLX-NEXT: kmovw %edi, %k1 12854; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12855; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12856; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12857; NoVLX-NEXT: kmovw %k0, %eax 12858; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12859; NoVLX-NEXT: vzeroupper 12860; NoVLX-NEXT: retq 12861entry: 12862 %0 = bitcast <2 x i64> %__a to <2 x i64> 12863 %load = load <2 x i64>, <2 x i64>* %__b 12864 %1 = bitcast <2 x i64> %load to <2 x i64> 12865 %2 = icmp sge <2 x i64> %0, %1 12866 %3 = bitcast i8 %__u to <8 x i1> 12867 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12868 %4 = and <2 x i1> %2, %extract.i 12869 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12870 %6 = bitcast <8 x i1> %5 to i8 12871 ret i8 %6 12872} 12873 12874 12875define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 12876; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12877; VLX: # %bb.0: # %entry 12878; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 12879; VLX-NEXT: kmovd %k0, %eax 12880; VLX-NEXT: # kill: def $al killed $al killed $eax 12881; VLX-NEXT: retq 12882; 12883; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12884; NoVLX: # %bb.0: # %entry 12885; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12886; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 12887; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12888; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12889; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12890; NoVLX-NEXT: kmovw %k0, %eax 12891; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12892; NoVLX-NEXT: vzeroupper 12893; NoVLX-NEXT: retq 12894entry: 12895 %0 = bitcast <2 x i64> %__a to <2 x i64> 12896 %load = load i64, i64* %__b 12897 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12898 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12899 %2 = icmp sge <2 x i64> %0, %1 12900 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12901 %4 = bitcast <8 x i1> %3 to i8 12902 ret i8 %4 12903} 12904 12905define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 12906; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12907; VLX: # %bb.0: # %entry 12908; VLX-NEXT: kmovd %edi, %k1 12909; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 12910; VLX-NEXT: kmovd %k0, %eax 12911; VLX-NEXT: # kill: def $al killed $al killed $eax 12912; VLX-NEXT: retq 12913; 12914; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12915; NoVLX: # %bb.0: # %entry 12916; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12917; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 12918; NoVLX-NEXT: kmovw %edi, %k1 12919; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12920; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12921; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12922; NoVLX-NEXT: kmovw %k0, %eax 12923; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12924; NoVLX-NEXT: vzeroupper 12925; NoVLX-NEXT: retq 12926entry: 12927 %0 = bitcast <2 x i64> %__a to <2 x i64> 12928 %load = load i64, i64* %__b 12929 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12930 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12931 %2 = icmp sge <2 x i64> %0, %1 12932 %3 = bitcast i8 %__u to <8 x i1> 12933 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12934 %4 = and <2 x i1> %extract.i, %2 12935 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12936 %6 = bitcast <8 x i1> %5 to i8 12937 ret i8 %6 12938} 12939 12940 12941define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12942; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: 12943; VLX: # %bb.0: # %entry 12944; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12945; VLX-NEXT: kmovd %k0, %eax 12946; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12947; VLX-NEXT: retq 12948; 12949; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: 12950; NoVLX: # %bb.0: # %entry 12951; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12952; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12953; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12954; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12955; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12956; NoVLX-NEXT: kmovw %k0, %eax 12957; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12958; NoVLX-NEXT: vzeroupper 12959; NoVLX-NEXT: retq 12960entry: 12961 %0 = bitcast <2 x i64> %__a to <2 x i64> 12962 %1 = bitcast <2 x i64> %__b to <2 x i64> 12963 %2 = icmp sge <2 x i64> %0, %1 12964 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12965 %4 = bitcast <16 x i1> %3 to i16 12966 ret i16 %4 12967} 12968 12969define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 12970; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: 12971; VLX: # %bb.0: # %entry 12972; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12973; VLX-NEXT: kmovd %k0, %eax 12974; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12975; VLX-NEXT: retq 12976; 12977; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: 12978; NoVLX: # %bb.0: # %entry 12979; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12980; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12981; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12982; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12983; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12984; NoVLX-NEXT: kmovw %k0, %eax 12985; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12986; NoVLX-NEXT: vzeroupper 12987; NoVLX-NEXT: retq 12988entry: 12989 %0 = bitcast <2 x i64> %__a to <2 x i64> 12990 %load = load <2 x i64>, <2 x i64>* %__b 12991 %1 = bitcast <2 x i64> %load to <2 x i64> 12992 %2 = icmp sge <2 x i64> %0, %1 12993 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12994 %4 = bitcast <16 x i1> %3 to i16 12995 ret i16 %4 12996} 12997 12998define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12999; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: 13000; VLX: # %bb.0: # %entry 13001; VLX-NEXT: kmovd %edi, %k1 13002; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 13003; VLX-NEXT: kmovd %k0, %eax 13004; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13005; VLX-NEXT: retq 13006; 13007; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: 13008; NoVLX: # %bb.0: # %entry 13009; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13010; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13011; NoVLX-NEXT: kmovw %edi, %k1 13012; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13013; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13014; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13015; NoVLX-NEXT: kmovw %k0, %eax 13016; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13017; NoVLX-NEXT: vzeroupper 13018; NoVLX-NEXT: retq 13019entry: 13020 %0 = bitcast <2 x i64> %__a to <2 x i64> 13021 %1 = bitcast <2 x i64> %__b to <2 x i64> 13022 %2 = icmp sge <2 x i64> %0, %1 13023 %3 = bitcast i8 %__u to <8 x i1> 13024 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13025 %4 = and <2 x i1> %2, %extract.i 13026 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13027 %6 = bitcast <16 x i1> %5 to i16 13028 ret i16 %6 13029} 13030 13031define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 13032; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: 13033; VLX: # %bb.0: # %entry 13034; VLX-NEXT: kmovd %edi, %k1 13035; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 13036; VLX-NEXT: kmovd %k0, %eax 13037; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13038; VLX-NEXT: retq 13039; 13040; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: 13041; NoVLX: # %bb.0: # %entry 13042; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13043; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 13044; NoVLX-NEXT: kmovw %edi, %k1 13045; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13046; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13047; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13048; NoVLX-NEXT: kmovw %k0, %eax 13049; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13050; NoVLX-NEXT: vzeroupper 13051; NoVLX-NEXT: retq 13052entry: 13053 %0 = bitcast <2 x i64> %__a to <2 x i64> 13054 %load = load <2 x i64>, <2 x i64>* %__b 13055 %1 = bitcast <2 x i64> %load to <2 x i64> 13056 %2 = icmp sge <2 x i64> %0, %1 13057 %3 = bitcast i8 %__u to <8 x i1> 13058 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13059 %4 = and <2 x i1> %2, %extract.i 13060 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13061 %6 = bitcast <16 x i1> %5 to i16 13062 ret i16 %6 13063} 13064 13065 13066define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 13067; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 13068; VLX: # %bb.0: # %entry 13069; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 13070; VLX-NEXT: kmovd %k0, %eax 13071; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13072; VLX-NEXT: retq 13073; 13074; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 13075; NoVLX: # %bb.0: # %entry 13076; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13077; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 13078; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13079; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13080; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13081; NoVLX-NEXT: kmovw %k0, %eax 13082; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13083; NoVLX-NEXT: vzeroupper 13084; NoVLX-NEXT: retq 13085entry: 13086 %0 = bitcast <2 x i64> %__a to <2 x i64> 13087 %load = load i64, i64* %__b 13088 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13089 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13090 %2 = icmp sge <2 x i64> %0, %1 13091 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13092 %4 = bitcast <16 x i1> %3 to i16 13093 ret i16 %4 13094} 13095 13096define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 13097; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 13098; VLX: # %bb.0: # %entry 13099; VLX-NEXT: kmovd %edi, %k1 13100; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 13101; VLX-NEXT: kmovd %k0, %eax 13102; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13103; VLX-NEXT: retq 13104; 13105; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 13106; NoVLX: # %bb.0: # %entry 13107; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13108; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 13109; NoVLX-NEXT: kmovw %edi, %k1 13110; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13111; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13112; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13113; NoVLX-NEXT: kmovw %k0, %eax 13114; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13115; NoVLX-NEXT: vzeroupper 13116; NoVLX-NEXT: retq 13117entry: 13118 %0 = bitcast <2 x i64> %__a to <2 x i64> 13119 %load = load i64, i64* %__b 13120 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13121 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13122 %2 = icmp sge <2 x i64> %0, %1 13123 %3 = bitcast i8 %__u to <8 x i1> 13124 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13125 %4 = and <2 x i1> %extract.i, %2 13126 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13127 %6 = bitcast <16 x i1> %5 to i16 13128 ret i16 %6 13129} 13130 13131 13132define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13133; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: 13134; VLX: # %bb.0: # %entry 13135; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 13136; VLX-NEXT: kmovd %k0, %eax 13137; VLX-NEXT: retq 13138; 13139; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: 13140; NoVLX: # %bb.0: # %entry 13141; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13142; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13143; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13144; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13145; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13146; NoVLX-NEXT: kmovw %k0, %eax 13147; NoVLX-NEXT: vzeroupper 13148; NoVLX-NEXT: retq 13149entry: 13150 %0 = bitcast <2 x i64> %__a to <2 x i64> 13151 %1 = bitcast <2 x i64> %__b to <2 x i64> 13152 %2 = icmp sge <2 x i64> %0, %1 13153 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13154 %4 = bitcast <32 x i1> %3 to i32 13155 ret i32 %4 13156} 13157 13158define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 13159; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: 13160; VLX: # %bb.0: # %entry 13161; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 13162; VLX-NEXT: kmovd %k0, %eax 13163; VLX-NEXT: retq 13164; 13165; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: 13166; NoVLX: # %bb.0: # %entry 13167; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13168; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 13169; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13170; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13171; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13172; NoVLX-NEXT: kmovw %k0, %eax 13173; NoVLX-NEXT: vzeroupper 13174; NoVLX-NEXT: retq 13175entry: 13176 %0 = bitcast <2 x i64> %__a to <2 x i64> 13177 %load = load <2 x i64>, <2 x i64>* %__b 13178 %1 = bitcast <2 x i64> %load to <2 x i64> 13179 %2 = icmp sge <2 x i64> %0, %1 13180 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13181 %4 = bitcast <32 x i1> %3 to i32 13182 ret i32 %4 13183} 13184 13185define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13186; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: 13187; VLX: # %bb.0: # %entry 13188; VLX-NEXT: kmovd %edi, %k1 13189; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 13190; VLX-NEXT: kmovd %k0, %eax 13191; VLX-NEXT: retq 13192; 13193; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: 13194; NoVLX: # %bb.0: # %entry 13195; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13196; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13197; NoVLX-NEXT: kmovw %edi, %k1 13198; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13199; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13200; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13201; NoVLX-NEXT: kmovw %k0, %eax 13202; NoVLX-NEXT: vzeroupper 13203; NoVLX-NEXT: retq 13204entry: 13205 %0 = bitcast <2 x i64> %__a to <2 x i64> 13206 %1 = bitcast <2 x i64> %__b to <2 x i64> 13207 %2 = icmp sge <2 x i64> %0, %1 13208 %3 = bitcast i8 %__u to <8 x i1> 13209 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13210 %4 = and <2 x i1> %2, %extract.i 13211 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13212 %6 = bitcast <32 x i1> %5 to i32 13213 ret i32 %6 13214} 13215 13216define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 13217; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: 13218; VLX: # %bb.0: # %entry 13219; VLX-NEXT: kmovd %edi, %k1 13220; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 13221; VLX-NEXT: kmovd %k0, %eax 13222; VLX-NEXT: retq 13223; 13224; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: 13225; NoVLX: # %bb.0: # %entry 13226; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13227; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 13228; NoVLX-NEXT: kmovw %edi, %k1 13229; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13230; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13231; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13232; NoVLX-NEXT: kmovw %k0, %eax 13233; NoVLX-NEXT: vzeroupper 13234; NoVLX-NEXT: retq 13235entry: 13236 %0 = bitcast <2 x i64> %__a to <2 x i64> 13237 %load = load <2 x i64>, <2 x i64>* %__b 13238 %1 = bitcast <2 x i64> %load to <2 x i64> 13239 %2 = icmp sge <2 x i64> %0, %1 13240 %3 = bitcast i8 %__u to <8 x i1> 13241 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13242 %4 = and <2 x i1> %2, %extract.i 13243 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13244 %6 = bitcast <32 x i1> %5 to i32 13245 ret i32 %6 13246} 13247 13248 13249define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 13250; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13251; VLX: # %bb.0: # %entry 13252; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 13253; VLX-NEXT: kmovd %k0, %eax 13254; VLX-NEXT: retq 13255; 13256; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13257; NoVLX: # %bb.0: # %entry 13258; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13259; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 13260; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13261; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13262; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13263; NoVLX-NEXT: kmovw %k0, %eax 13264; NoVLX-NEXT: vzeroupper 13265; NoVLX-NEXT: retq 13266entry: 13267 %0 = bitcast <2 x i64> %__a to <2 x i64> 13268 %load = load i64, i64* %__b 13269 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13270 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13271 %2 = icmp sge <2 x i64> %0, %1 13272 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13273 %4 = bitcast <32 x i1> %3 to i32 13274 ret i32 %4 13275} 13276 13277define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 13278; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13279; VLX: # %bb.0: # %entry 13280; VLX-NEXT: kmovd %edi, %k1 13281; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 13282; VLX-NEXT: kmovd %k0, %eax 13283; VLX-NEXT: retq 13284; 13285; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13286; NoVLX: # %bb.0: # %entry 13287; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13288; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 13289; NoVLX-NEXT: kmovw %edi, %k1 13290; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13291; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13292; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13293; NoVLX-NEXT: kmovw %k0, %eax 13294; NoVLX-NEXT: vzeroupper 13295; NoVLX-NEXT: retq 13296entry: 13297 %0 = bitcast <2 x i64> %__a to <2 x i64> 13298 %load = load i64, i64* %__b 13299 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13300 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13301 %2 = icmp sge <2 x i64> %0, %1 13302 %3 = bitcast i8 %__u to <8 x i1> 13303 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13304 %4 = and <2 x i1> %extract.i, %2 13305 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13306 %6 = bitcast <32 x i1> %5 to i32 13307 ret i32 %6 13308} 13309 13310 13311define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13312; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: 13313; VLX: # %bb.0: # %entry 13314; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 13315; VLX-NEXT: kmovq %k0, %rax 13316; VLX-NEXT: retq 13317; 13318; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: 13319; NoVLX: # %bb.0: # %entry 13320; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13321; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13322; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13323; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13324; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13325; NoVLX-NEXT: kmovw %k0, %eax 13326; NoVLX-NEXT: movzwl %ax, %eax 13327; NoVLX-NEXT: vzeroupper 13328; NoVLX-NEXT: retq 13329entry: 13330 %0 = bitcast <2 x i64> %__a to <2 x i64> 13331 %1 = bitcast <2 x i64> %__b to <2 x i64> 13332 %2 = icmp sge <2 x i64> %0, %1 13333 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13334 %4 = bitcast <64 x i1> %3 to i64 13335 ret i64 %4 13336} 13337 13338define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 13339; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: 13340; VLX: # %bb.0: # %entry 13341; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 13342; VLX-NEXT: kmovq %k0, %rax 13343; VLX-NEXT: retq 13344; 13345; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: 13346; NoVLX: # %bb.0: # %entry 13347; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13348; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 13349; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13350; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13351; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13352; NoVLX-NEXT: kmovw %k0, %eax 13353; NoVLX-NEXT: movzwl %ax, %eax 13354; NoVLX-NEXT: vzeroupper 13355; NoVLX-NEXT: retq 13356entry: 13357 %0 = bitcast <2 x i64> %__a to <2 x i64> 13358 %load = load <2 x i64>, <2 x i64>* %__b 13359 %1 = bitcast <2 x i64> %load to <2 x i64> 13360 %2 = icmp sge <2 x i64> %0, %1 13361 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13362 %4 = bitcast <64 x i1> %3 to i64 13363 ret i64 %4 13364} 13365 13366define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13367; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: 13368; VLX: # %bb.0: # %entry 13369; VLX-NEXT: kmovd %edi, %k1 13370; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 13371; VLX-NEXT: kmovq %k0, %rax 13372; VLX-NEXT: retq 13373; 13374; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: 13375; NoVLX: # %bb.0: # %entry 13376; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13377; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13378; NoVLX-NEXT: kmovw %edi, %k1 13379; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13380; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13381; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13382; NoVLX-NEXT: kmovw %k0, %eax 13383; NoVLX-NEXT: movzwl %ax, %eax 13384; NoVLX-NEXT: vzeroupper 13385; NoVLX-NEXT: retq 13386entry: 13387 %0 = bitcast <2 x i64> %__a to <2 x i64> 13388 %1 = bitcast <2 x i64> %__b to <2 x i64> 13389 %2 = icmp sge <2 x i64> %0, %1 13390 %3 = bitcast i8 %__u to <8 x i1> 13391 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13392 %4 = and <2 x i1> %2, %extract.i 13393 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13394 %6 = bitcast <64 x i1> %5 to i64 13395 ret i64 %6 13396} 13397 13398define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 13399; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: 13400; VLX: # %bb.0: # %entry 13401; VLX-NEXT: kmovd %edi, %k1 13402; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 13403; VLX-NEXT: kmovq %k0, %rax 13404; VLX-NEXT: retq 13405; 13406; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: 13407; NoVLX: # %bb.0: # %entry 13408; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13409; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 13410; NoVLX-NEXT: kmovw %edi, %k1 13411; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13412; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13413; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13414; NoVLX-NEXT: kmovw %k0, %eax 13415; NoVLX-NEXT: movzwl %ax, %eax 13416; NoVLX-NEXT: vzeroupper 13417; NoVLX-NEXT: retq 13418entry: 13419 %0 = bitcast <2 x i64> %__a to <2 x i64> 13420 %load = load <2 x i64>, <2 x i64>* %__b 13421 %1 = bitcast <2 x i64> %load to <2 x i64> 13422 %2 = icmp sge <2 x i64> %0, %1 13423 %3 = bitcast i8 %__u to <8 x i1> 13424 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13425 %4 = and <2 x i1> %2, %extract.i 13426 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13427 %6 = bitcast <64 x i1> %5 to i64 13428 ret i64 %6 13429} 13430 13431 13432define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 13433; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13434; VLX: # %bb.0: # %entry 13435; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 13436; VLX-NEXT: kmovq %k0, %rax 13437; VLX-NEXT: retq 13438; 13439; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13440; NoVLX: # %bb.0: # %entry 13441; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13442; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 13443; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13444; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13445; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13446; NoVLX-NEXT: kmovw %k0, %eax 13447; NoVLX-NEXT: movzwl %ax, %eax 13448; NoVLX-NEXT: vzeroupper 13449; NoVLX-NEXT: retq 13450entry: 13451 %0 = bitcast <2 x i64> %__a to <2 x i64> 13452 %load = load i64, i64* %__b 13453 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13454 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13455 %2 = icmp sge <2 x i64> %0, %1 13456 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13457 %4 = bitcast <64 x i1> %3 to i64 13458 ret i64 %4 13459} 13460 13461define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 13462; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13463; VLX: # %bb.0: # %entry 13464; VLX-NEXT: kmovd %edi, %k1 13465; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 13466; VLX-NEXT: kmovq %k0, %rax 13467; VLX-NEXT: retq 13468; 13469; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13470; NoVLX: # %bb.0: # %entry 13471; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13472; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 13473; NoVLX-NEXT: kmovw %edi, %k1 13474; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13475; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13476; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13477; NoVLX-NEXT: kmovw %k0, %eax 13478; NoVLX-NEXT: movzwl %ax, %eax 13479; NoVLX-NEXT: vzeroupper 13480; NoVLX-NEXT: retq 13481entry: 13482 %0 = bitcast <2 x i64> %__a to <2 x i64> 13483 %load = load i64, i64* %__b 13484 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13485 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13486 %2 = icmp sge <2 x i64> %0, %1 13487 %3 = bitcast i8 %__u to <8 x i1> 13488 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13489 %4 = and <2 x i1> %extract.i, %2 13490 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13491 %6 = bitcast <64 x i1> %5 to i64 13492 ret i64 %6 13493} 13494 13495 13496define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13497; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: 13498; VLX: # %bb.0: # %entry 13499; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13500; VLX-NEXT: kmovd %k0, %eax 13501; VLX-NEXT: # kill: def $al killed $al killed $eax 13502; VLX-NEXT: vzeroupper 13503; VLX-NEXT: retq 13504; 13505; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: 13506; NoVLX: # %bb.0: # %entry 13507; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13508; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13509; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13510; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13511; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13512; NoVLX-NEXT: kmovw %k0, %eax 13513; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13514; NoVLX-NEXT: vzeroupper 13515; NoVLX-NEXT: retq 13516entry: 13517 %0 = bitcast <4 x i64> %__a to <4 x i64> 13518 %1 = bitcast <4 x i64> %__b to <4 x i64> 13519 %2 = icmp sge <4 x i64> %0, %1 13520 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13521 %4 = bitcast <8 x i1> %3 to i8 13522 ret i8 %4 13523} 13524 13525define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13526; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: 13527; VLX: # %bb.0: # %entry 13528; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13529; VLX-NEXT: kmovd %k0, %eax 13530; VLX-NEXT: # kill: def $al killed $al killed $eax 13531; VLX-NEXT: vzeroupper 13532; VLX-NEXT: retq 13533; 13534; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: 13535; NoVLX: # %bb.0: # %entry 13536; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13537; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13538; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13539; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13540; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13541; NoVLX-NEXT: kmovw %k0, %eax 13542; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13543; NoVLX-NEXT: vzeroupper 13544; NoVLX-NEXT: retq 13545entry: 13546 %0 = bitcast <4 x i64> %__a to <4 x i64> 13547 %load = load <4 x i64>, <4 x i64>* %__b 13548 %1 = bitcast <4 x i64> %load to <4 x i64> 13549 %2 = icmp sge <4 x i64> %0, %1 13550 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13551 %4 = bitcast <8 x i1> %3 to i8 13552 ret i8 %4 13553} 13554 13555define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13556; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: 13557; VLX: # %bb.0: # %entry 13558; VLX-NEXT: kmovd %edi, %k1 13559; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13560; VLX-NEXT: kmovd %k0, %eax 13561; VLX-NEXT: # kill: def $al killed $al killed $eax 13562; VLX-NEXT: vzeroupper 13563; VLX-NEXT: retq 13564; 13565; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: 13566; NoVLX: # %bb.0: # %entry 13567; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13568; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13569; NoVLX-NEXT: kmovw %edi, %k1 13570; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13571; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13572; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13573; NoVLX-NEXT: kmovw %k0, %eax 13574; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13575; NoVLX-NEXT: vzeroupper 13576; NoVLX-NEXT: retq 13577entry: 13578 %0 = bitcast <4 x i64> %__a to <4 x i64> 13579 %1 = bitcast <4 x i64> %__b to <4 x i64> 13580 %2 = icmp sge <4 x i64> %0, %1 13581 %3 = bitcast i8 %__u to <8 x i1> 13582 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13583 %4 = and <4 x i1> %2, %extract.i 13584 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13585 %6 = bitcast <8 x i1> %5 to i8 13586 ret i8 %6 13587} 13588 13589define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13590; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: 13591; VLX: # %bb.0: # %entry 13592; VLX-NEXT: kmovd %edi, %k1 13593; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13594; VLX-NEXT: kmovd %k0, %eax 13595; VLX-NEXT: # kill: def $al killed $al killed $eax 13596; VLX-NEXT: vzeroupper 13597; VLX-NEXT: retq 13598; 13599; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: 13600; NoVLX: # %bb.0: # %entry 13601; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13602; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13603; NoVLX-NEXT: kmovw %edi, %k1 13604; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13605; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13606; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13607; NoVLX-NEXT: kmovw %k0, %eax 13608; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13609; NoVLX-NEXT: vzeroupper 13610; NoVLX-NEXT: retq 13611entry: 13612 %0 = bitcast <4 x i64> %__a to <4 x i64> 13613 %load = load <4 x i64>, <4 x i64>* %__b 13614 %1 = bitcast <4 x i64> %load to <4 x i64> 13615 %2 = icmp sge <4 x i64> %0, %1 13616 %3 = bitcast i8 %__u to <8 x i1> 13617 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13618 %4 = and <4 x i1> %2, %extract.i 13619 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13620 %6 = bitcast <8 x i1> %5 to i8 13621 ret i8 %6 13622} 13623 13624 13625define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 13626; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13627; VLX: # %bb.0: # %entry 13628; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13629; VLX-NEXT: kmovd %k0, %eax 13630; VLX-NEXT: # kill: def $al killed $al killed $eax 13631; VLX-NEXT: vzeroupper 13632; VLX-NEXT: retq 13633; 13634; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13635; NoVLX: # %bb.0: # %entry 13636; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13637; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 13638; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13639; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13640; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13641; NoVLX-NEXT: kmovw %k0, %eax 13642; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13643; NoVLX-NEXT: vzeroupper 13644; NoVLX-NEXT: retq 13645entry: 13646 %0 = bitcast <4 x i64> %__a to <4 x i64> 13647 %load = load i64, i64* %__b 13648 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13649 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13650 %2 = icmp sge <4 x i64> %0, %1 13651 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13652 %4 = bitcast <8 x i1> %3 to i8 13653 ret i8 %4 13654} 13655 13656define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 13657; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13658; VLX: # %bb.0: # %entry 13659; VLX-NEXT: kmovd %edi, %k1 13660; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13661; VLX-NEXT: kmovd %k0, %eax 13662; VLX-NEXT: # kill: def $al killed $al killed $eax 13663; VLX-NEXT: vzeroupper 13664; VLX-NEXT: retq 13665; 13666; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13667; NoVLX: # %bb.0: # %entry 13668; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13669; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 13670; NoVLX-NEXT: kmovw %edi, %k1 13671; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13672; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13673; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13674; NoVLX-NEXT: kmovw %k0, %eax 13675; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13676; NoVLX-NEXT: vzeroupper 13677; NoVLX-NEXT: retq 13678entry: 13679 %0 = bitcast <4 x i64> %__a to <4 x i64> 13680 %load = load i64, i64* %__b 13681 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13682 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13683 %2 = icmp sge <4 x i64> %0, %1 13684 %3 = bitcast i8 %__u to <8 x i1> 13685 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13686 %4 = and <4 x i1> %extract.i, %2 13687 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13688 %6 = bitcast <8 x i1> %5 to i8 13689 ret i8 %6 13690} 13691 13692 13693define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13694; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: 13695; VLX: # %bb.0: # %entry 13696; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13697; VLX-NEXT: kmovd %k0, %eax 13698; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13699; VLX-NEXT: vzeroupper 13700; VLX-NEXT: retq 13701; 13702; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: 13703; NoVLX: # %bb.0: # %entry 13704; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13705; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13706; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13707; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13708; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13709; NoVLX-NEXT: kmovw %k0, %eax 13710; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13711; NoVLX-NEXT: vzeroupper 13712; NoVLX-NEXT: retq 13713entry: 13714 %0 = bitcast <4 x i64> %__a to <4 x i64> 13715 %1 = bitcast <4 x i64> %__b to <4 x i64> 13716 %2 = icmp sge <4 x i64> %0, %1 13717 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13718 %4 = bitcast <16 x i1> %3 to i16 13719 ret i16 %4 13720} 13721 13722define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13723; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: 13724; VLX: # %bb.0: # %entry 13725; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13726; VLX-NEXT: kmovd %k0, %eax 13727; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13728; VLX-NEXT: vzeroupper 13729; VLX-NEXT: retq 13730; 13731; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: 13732; NoVLX: # %bb.0: # %entry 13733; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13734; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13735; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13736; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13737; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13738; NoVLX-NEXT: kmovw %k0, %eax 13739; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13740; NoVLX-NEXT: vzeroupper 13741; NoVLX-NEXT: retq 13742entry: 13743 %0 = bitcast <4 x i64> %__a to <4 x i64> 13744 %load = load <4 x i64>, <4 x i64>* %__b 13745 %1 = bitcast <4 x i64> %load to <4 x i64> 13746 %2 = icmp sge <4 x i64> %0, %1 13747 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13748 %4 = bitcast <16 x i1> %3 to i16 13749 ret i16 %4 13750} 13751 13752define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13753; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: 13754; VLX: # %bb.0: # %entry 13755; VLX-NEXT: kmovd %edi, %k1 13756; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13757; VLX-NEXT: kmovd %k0, %eax 13758; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13759; VLX-NEXT: vzeroupper 13760; VLX-NEXT: retq 13761; 13762; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: 13763; NoVLX: # %bb.0: # %entry 13764; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13765; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13766; NoVLX-NEXT: kmovw %edi, %k1 13767; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13768; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13769; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13770; NoVLX-NEXT: kmovw %k0, %eax 13771; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13772; NoVLX-NEXT: vzeroupper 13773; NoVLX-NEXT: retq 13774entry: 13775 %0 = bitcast <4 x i64> %__a to <4 x i64> 13776 %1 = bitcast <4 x i64> %__b to <4 x i64> 13777 %2 = icmp sge <4 x i64> %0, %1 13778 %3 = bitcast i8 %__u to <8 x i1> 13779 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13780 %4 = and <4 x i1> %2, %extract.i 13781 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13782 %6 = bitcast <16 x i1> %5 to i16 13783 ret i16 %6 13784} 13785 13786define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13787; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: 13788; VLX: # %bb.0: # %entry 13789; VLX-NEXT: kmovd %edi, %k1 13790; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13791; VLX-NEXT: kmovd %k0, %eax 13792; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13793; VLX-NEXT: vzeroupper 13794; VLX-NEXT: retq 13795; 13796; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: 13797; NoVLX: # %bb.0: # %entry 13798; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13799; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13800; NoVLX-NEXT: kmovw %edi, %k1 13801; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13802; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13803; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13804; NoVLX-NEXT: kmovw %k0, %eax 13805; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13806; NoVLX-NEXT: vzeroupper 13807; NoVLX-NEXT: retq 13808entry: 13809 %0 = bitcast <4 x i64> %__a to <4 x i64> 13810 %load = load <4 x i64>, <4 x i64>* %__b 13811 %1 = bitcast <4 x i64> %load to <4 x i64> 13812 %2 = icmp sge <4 x i64> %0, %1 13813 %3 = bitcast i8 %__u to <8 x i1> 13814 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13815 %4 = and <4 x i1> %2, %extract.i 13816 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13817 %6 = bitcast <16 x i1> %5 to i16 13818 ret i16 %6 13819} 13820 13821 13822define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 13823; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13824; VLX: # %bb.0: # %entry 13825; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13826; VLX-NEXT: kmovd %k0, %eax 13827; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13828; VLX-NEXT: vzeroupper 13829; VLX-NEXT: retq 13830; 13831; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13832; NoVLX: # %bb.0: # %entry 13833; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13834; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 13835; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13836; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13837; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13838; NoVLX-NEXT: kmovw %k0, %eax 13839; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13840; NoVLX-NEXT: vzeroupper 13841; NoVLX-NEXT: retq 13842entry: 13843 %0 = bitcast <4 x i64> %__a to <4 x i64> 13844 %load = load i64, i64* %__b 13845 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13846 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13847 %2 = icmp sge <4 x i64> %0, %1 13848 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13849 %4 = bitcast <16 x i1> %3 to i16 13850 ret i16 %4 13851} 13852 13853define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 13854; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13855; VLX: # %bb.0: # %entry 13856; VLX-NEXT: kmovd %edi, %k1 13857; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13858; VLX-NEXT: kmovd %k0, %eax 13859; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13860; VLX-NEXT: vzeroupper 13861; VLX-NEXT: retq 13862; 13863; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13864; NoVLX: # %bb.0: # %entry 13865; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13866; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 13867; NoVLX-NEXT: kmovw %edi, %k1 13868; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13869; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13870; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13871; NoVLX-NEXT: kmovw %k0, %eax 13872; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13873; NoVLX-NEXT: vzeroupper 13874; NoVLX-NEXT: retq 13875entry: 13876 %0 = bitcast <4 x i64> %__a to <4 x i64> 13877 %load = load i64, i64* %__b 13878 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13879 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13880 %2 = icmp sge <4 x i64> %0, %1 13881 %3 = bitcast i8 %__u to <8 x i1> 13882 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13883 %4 = and <4 x i1> %extract.i, %2 13884 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13885 %6 = bitcast <16 x i1> %5 to i16 13886 ret i16 %6 13887} 13888 13889 13890define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13891; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: 13892; VLX: # %bb.0: # %entry 13893; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13894; VLX-NEXT: kmovd %k0, %eax 13895; VLX-NEXT: vzeroupper 13896; VLX-NEXT: retq 13897; 13898; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: 13899; NoVLX: # %bb.0: # %entry 13900; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13901; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13902; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13903; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13904; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13905; NoVLX-NEXT: kmovw %k0, %eax 13906; NoVLX-NEXT: vzeroupper 13907; NoVLX-NEXT: retq 13908entry: 13909 %0 = bitcast <4 x i64> %__a to <4 x i64> 13910 %1 = bitcast <4 x i64> %__b to <4 x i64> 13911 %2 = icmp sge <4 x i64> %0, %1 13912 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13913 %4 = bitcast <32 x i1> %3 to i32 13914 ret i32 %4 13915} 13916 13917define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13918; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: 13919; VLX: # %bb.0: # %entry 13920; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13921; VLX-NEXT: kmovd %k0, %eax 13922; VLX-NEXT: vzeroupper 13923; VLX-NEXT: retq 13924; 13925; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: 13926; NoVLX: # %bb.0: # %entry 13927; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13928; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13929; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13930; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13931; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13932; NoVLX-NEXT: kmovw %k0, %eax 13933; NoVLX-NEXT: vzeroupper 13934; NoVLX-NEXT: retq 13935entry: 13936 %0 = bitcast <4 x i64> %__a to <4 x i64> 13937 %load = load <4 x i64>, <4 x i64>* %__b 13938 %1 = bitcast <4 x i64> %load to <4 x i64> 13939 %2 = icmp sge <4 x i64> %0, %1 13940 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13941 %4 = bitcast <32 x i1> %3 to i32 13942 ret i32 %4 13943} 13944 13945define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13946; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: 13947; VLX: # %bb.0: # %entry 13948; VLX-NEXT: kmovd %edi, %k1 13949; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13950; VLX-NEXT: kmovd %k0, %eax 13951; VLX-NEXT: vzeroupper 13952; VLX-NEXT: retq 13953; 13954; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: 13955; NoVLX: # %bb.0: # %entry 13956; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13957; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13958; NoVLX-NEXT: kmovw %edi, %k1 13959; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13960; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13961; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13962; NoVLX-NEXT: kmovw %k0, %eax 13963; NoVLX-NEXT: vzeroupper 13964; NoVLX-NEXT: retq 13965entry: 13966 %0 = bitcast <4 x i64> %__a to <4 x i64> 13967 %1 = bitcast <4 x i64> %__b to <4 x i64> 13968 %2 = icmp sge <4 x i64> %0, %1 13969 %3 = bitcast i8 %__u to <8 x i1> 13970 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13971 %4 = and <4 x i1> %2, %extract.i 13972 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13973 %6 = bitcast <32 x i1> %5 to i32 13974 ret i32 %6 13975} 13976 13977define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 13978; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: 13979; VLX: # %bb.0: # %entry 13980; VLX-NEXT: kmovd %edi, %k1 13981; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13982; VLX-NEXT: kmovd %k0, %eax 13983; VLX-NEXT: vzeroupper 13984; VLX-NEXT: retq 13985; 13986; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: 13987; NoVLX: # %bb.0: # %entry 13988; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13989; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13990; NoVLX-NEXT: kmovw %edi, %k1 13991; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13992; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13993; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13994; NoVLX-NEXT: kmovw %k0, %eax 13995; NoVLX-NEXT: vzeroupper 13996; NoVLX-NEXT: retq 13997entry: 13998 %0 = bitcast <4 x i64> %__a to <4 x i64> 13999 %load = load <4 x i64>, <4 x i64>* %__b 14000 %1 = bitcast <4 x i64> %load to <4 x i64> 14001 %2 = icmp sge <4 x i64> %0, %1 14002 %3 = bitcast i8 %__u to <8 x i1> 14003 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14004 %4 = and <4 x i1> %2, %extract.i 14005 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14006 %6 = bitcast <32 x i1> %5 to i32 14007 ret i32 %6 14008} 14009 14010 14011define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 14012; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 14013; VLX: # %bb.0: # %entry 14014; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 14015; VLX-NEXT: kmovd %k0, %eax 14016; VLX-NEXT: vzeroupper 14017; VLX-NEXT: retq 14018; 14019; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 14020; NoVLX: # %bb.0: # %entry 14021; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14022; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 14023; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14024; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14025; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14026; NoVLX-NEXT: kmovw %k0, %eax 14027; NoVLX-NEXT: vzeroupper 14028; NoVLX-NEXT: retq 14029entry: 14030 %0 = bitcast <4 x i64> %__a to <4 x i64> 14031 %load = load i64, i64* %__b 14032 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 14033 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 14034 %2 = icmp sge <4 x i64> %0, %1 14035 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14036 %4 = bitcast <32 x i1> %3 to i32 14037 ret i32 %4 14038} 14039 14040define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 14041; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 14042; VLX: # %bb.0: # %entry 14043; VLX-NEXT: kmovd %edi, %k1 14044; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 14045; VLX-NEXT: kmovd %k0, %eax 14046; VLX-NEXT: vzeroupper 14047; VLX-NEXT: retq 14048; 14049; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 14050; NoVLX: # %bb.0: # %entry 14051; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14052; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 14053; NoVLX-NEXT: kmovw %edi, %k1 14054; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14055; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14056; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14057; NoVLX-NEXT: kmovw %k0, %eax 14058; NoVLX-NEXT: vzeroupper 14059; NoVLX-NEXT: retq 14060entry: 14061 %0 = bitcast <4 x i64> %__a to <4 x i64> 14062 %load = load i64, i64* %__b 14063 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 14064 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 14065 %2 = icmp sge <4 x i64> %0, %1 14066 %3 = bitcast i8 %__u to <8 x i1> 14067 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14068 %4 = and <4 x i1> %extract.i, %2 14069 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14070 %6 = bitcast <32 x i1> %5 to i32 14071 ret i32 %6 14072} 14073 14074 14075define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 14076; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: 14077; VLX: # %bb.0: # %entry 14078; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 14079; VLX-NEXT: kmovq %k0, %rax 14080; VLX-NEXT: vzeroupper 14081; VLX-NEXT: retq 14082; 14083; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: 14084; NoVLX: # %bb.0: # %entry 14085; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 14086; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14087; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14088; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14089; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14090; NoVLX-NEXT: kmovw %k0, %eax 14091; NoVLX-NEXT: movzwl %ax, %eax 14092; NoVLX-NEXT: vzeroupper 14093; NoVLX-NEXT: retq 14094entry: 14095 %0 = bitcast <4 x i64> %__a to <4 x i64> 14096 %1 = bitcast <4 x i64> %__b to <4 x i64> 14097 %2 = icmp sge <4 x i64> %0, %1 14098 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14099 %4 = bitcast <64 x i1> %3 to i64 14100 ret i64 %4 14101} 14102 14103define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 14104; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: 14105; VLX: # %bb.0: # %entry 14106; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 14107; VLX-NEXT: kmovq %k0, %rax 14108; VLX-NEXT: vzeroupper 14109; VLX-NEXT: retq 14110; 14111; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: 14112; NoVLX: # %bb.0: # %entry 14113; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14114; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 14115; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14116; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14117; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14118; NoVLX-NEXT: kmovw %k0, %eax 14119; NoVLX-NEXT: movzwl %ax, %eax 14120; NoVLX-NEXT: vzeroupper 14121; NoVLX-NEXT: retq 14122entry: 14123 %0 = bitcast <4 x i64> %__a to <4 x i64> 14124 %load = load <4 x i64>, <4 x i64>* %__b 14125 %1 = bitcast <4 x i64> %load to <4 x i64> 14126 %2 = icmp sge <4 x i64> %0, %1 14127 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14128 %4 = bitcast <64 x i1> %3 to i64 14129 ret i64 %4 14130} 14131 14132define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 14133; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: 14134; VLX: # %bb.0: # %entry 14135; VLX-NEXT: kmovd %edi, %k1 14136; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 14137; VLX-NEXT: kmovq %k0, %rax 14138; VLX-NEXT: vzeroupper 14139; VLX-NEXT: retq 14140; 14141; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: 14142; NoVLX: # %bb.0: # %entry 14143; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 14144; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14145; NoVLX-NEXT: kmovw %edi, %k1 14146; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14147; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14148; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14149; NoVLX-NEXT: kmovw %k0, %eax 14150; NoVLX-NEXT: movzwl %ax, %eax 14151; NoVLX-NEXT: vzeroupper 14152; NoVLX-NEXT: retq 14153entry: 14154 %0 = bitcast <4 x i64> %__a to <4 x i64> 14155 %1 = bitcast <4 x i64> %__b to <4 x i64> 14156 %2 = icmp sge <4 x i64> %0, %1 14157 %3 = bitcast i8 %__u to <8 x i1> 14158 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14159 %4 = and <4 x i1> %2, %extract.i 14160 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14161 %6 = bitcast <64 x i1> %5 to i64 14162 ret i64 %6 14163} 14164 14165define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 14166; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: 14167; VLX: # %bb.0: # %entry 14168; VLX-NEXT: kmovd %edi, %k1 14169; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 14170; VLX-NEXT: kmovq %k0, %rax 14171; VLX-NEXT: vzeroupper 14172; VLX-NEXT: retq 14173; 14174; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: 14175; NoVLX: # %bb.0: # %entry 14176; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14177; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 14178; NoVLX-NEXT: kmovw %edi, %k1 14179; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14180; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14181; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14182; NoVLX-NEXT: kmovw %k0, %eax 14183; NoVLX-NEXT: movzwl %ax, %eax 14184; NoVLX-NEXT: vzeroupper 14185; NoVLX-NEXT: retq 14186entry: 14187 %0 = bitcast <4 x i64> %__a to <4 x i64> 14188 %load = load <4 x i64>, <4 x i64>* %__b 14189 %1 = bitcast <4 x i64> %load to <4 x i64> 14190 %2 = icmp sge <4 x i64> %0, %1 14191 %3 = bitcast i8 %__u to <8 x i1> 14192 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14193 %4 = and <4 x i1> %2, %extract.i 14194 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14195 %6 = bitcast <64 x i1> %5 to i64 14196 ret i64 %6 14197} 14198 14199 14200define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 14201; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 14202; VLX: # %bb.0: # %entry 14203; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 14204; VLX-NEXT: kmovq %k0, %rax 14205; VLX-NEXT: vzeroupper 14206; VLX-NEXT: retq 14207; 14208; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 14209; NoVLX: # %bb.0: # %entry 14210; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14211; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 14212; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14213; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14214; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14215; NoVLX-NEXT: kmovw %k0, %eax 14216; NoVLX-NEXT: movzwl %ax, %eax 14217; NoVLX-NEXT: vzeroupper 14218; NoVLX-NEXT: retq 14219entry: 14220 %0 = bitcast <4 x i64> %__a to <4 x i64> 14221 %load = load i64, i64* %__b 14222 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 14223 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 14224 %2 = icmp sge <4 x i64> %0, %1 14225 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14226 %4 = bitcast <64 x i1> %3 to i64 14227 ret i64 %4 14228} 14229 14230define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 14231; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 14232; VLX: # %bb.0: # %entry 14233; VLX-NEXT: kmovd %edi, %k1 14234; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 14235; VLX-NEXT: kmovq %k0, %rax 14236; VLX-NEXT: vzeroupper 14237; VLX-NEXT: retq 14238; 14239; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 14240; NoVLX: # %bb.0: # %entry 14241; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 14242; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 14243; NoVLX-NEXT: kmovw %edi, %k1 14244; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14245; NoVLX-NEXT: kshiftlw $12, %k0, %k0 14246; NoVLX-NEXT: kshiftrw $12, %k0, %k0 14247; NoVLX-NEXT: kmovw %k0, %eax 14248; NoVLX-NEXT: movzwl %ax, %eax 14249; NoVLX-NEXT: vzeroupper 14250; NoVLX-NEXT: retq 14251entry: 14252 %0 = bitcast <4 x i64> %__a to <4 x i64> 14253 %load = load i64, i64* %__b 14254 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 14255 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 14256 %2 = icmp sge <4 x i64> %0, %1 14257 %3 = bitcast i8 %__u to <8 x i1> 14258 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 14259 %4 = and <4 x i1> %extract.i, %2 14260 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 14261 %6 = bitcast <64 x i1> %5 to i64 14262 ret i64 %6 14263} 14264 14265 14266define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14267; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: 14268; VLX: # %bb.0: # %entry 14269; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14270; VLX-NEXT: kmovd %k0, %eax 14271; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14272; VLX-NEXT: vzeroupper 14273; VLX-NEXT: retq 14274; 14275; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: 14276; NoVLX: # %bb.0: # %entry 14277; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14278; NoVLX-NEXT: kmovw %k0, %eax 14279; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14280; NoVLX-NEXT: vzeroupper 14281; NoVLX-NEXT: retq 14282entry: 14283 %0 = bitcast <8 x i64> %__a to <8 x i64> 14284 %1 = bitcast <8 x i64> %__b to <8 x i64> 14285 %2 = icmp sge <8 x i64> %0, %1 14286 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14287 %4 = bitcast <16 x i1> %3 to i16 14288 ret i16 %4 14289} 14290 14291define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14292; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: 14293; VLX: # %bb.0: # %entry 14294; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14295; VLX-NEXT: kmovd %k0, %eax 14296; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14297; VLX-NEXT: vzeroupper 14298; VLX-NEXT: retq 14299; 14300; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: 14301; NoVLX: # %bb.0: # %entry 14302; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14303; NoVLX-NEXT: kmovw %k0, %eax 14304; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14305; NoVLX-NEXT: vzeroupper 14306; NoVLX-NEXT: retq 14307entry: 14308 %0 = bitcast <8 x i64> %__a to <8 x i64> 14309 %load = load <8 x i64>, <8 x i64>* %__b 14310 %1 = bitcast <8 x i64> %load to <8 x i64> 14311 %2 = icmp sge <8 x i64> %0, %1 14312 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14313 %4 = bitcast <16 x i1> %3 to i16 14314 ret i16 %4 14315} 14316 14317define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14318; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: 14319; VLX: # %bb.0: # %entry 14320; VLX-NEXT: kmovd %edi, %k1 14321; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14322; VLX-NEXT: kmovd %k0, %eax 14323; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14324; VLX-NEXT: vzeroupper 14325; VLX-NEXT: retq 14326; 14327; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: 14328; NoVLX: # %bb.0: # %entry 14329; NoVLX-NEXT: kmovw %edi, %k1 14330; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14331; NoVLX-NEXT: kmovw %k0, %eax 14332; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14333; NoVLX-NEXT: vzeroupper 14334; NoVLX-NEXT: retq 14335entry: 14336 %0 = bitcast <8 x i64> %__a to <8 x i64> 14337 %1 = bitcast <8 x i64> %__b to <8 x i64> 14338 %2 = icmp sge <8 x i64> %0, %1 14339 %3 = bitcast i8 %__u to <8 x i1> 14340 %4 = and <8 x i1> %2, %3 14341 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14342 %6 = bitcast <16 x i1> %5 to i16 14343 ret i16 %6 14344} 14345 14346define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14347; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: 14348; VLX: # %bb.0: # %entry 14349; VLX-NEXT: kmovd %edi, %k1 14350; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14351; VLX-NEXT: kmovd %k0, %eax 14352; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14353; VLX-NEXT: vzeroupper 14354; VLX-NEXT: retq 14355; 14356; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: 14357; NoVLX: # %bb.0: # %entry 14358; NoVLX-NEXT: kmovw %edi, %k1 14359; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14360; NoVLX-NEXT: kmovw %k0, %eax 14361; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14362; NoVLX-NEXT: vzeroupper 14363; NoVLX-NEXT: retq 14364entry: 14365 %0 = bitcast <8 x i64> %__a to <8 x i64> 14366 %load = load <8 x i64>, <8 x i64>* %__b 14367 %1 = bitcast <8 x i64> %load to <8 x i64> 14368 %2 = icmp sge <8 x i64> %0, %1 14369 %3 = bitcast i8 %__u to <8 x i1> 14370 %4 = and <8 x i1> %2, %3 14371 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14372 %6 = bitcast <16 x i1> %5 to i16 14373 ret i16 %6 14374} 14375 14376 14377define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 14378; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14379; VLX: # %bb.0: # %entry 14380; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14381; VLX-NEXT: kmovd %k0, %eax 14382; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14383; VLX-NEXT: vzeroupper 14384; VLX-NEXT: retq 14385; 14386; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14387; NoVLX: # %bb.0: # %entry 14388; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14389; NoVLX-NEXT: kmovw %k0, %eax 14390; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14391; NoVLX-NEXT: vzeroupper 14392; NoVLX-NEXT: retq 14393entry: 14394 %0 = bitcast <8 x i64> %__a to <8 x i64> 14395 %load = load i64, i64* %__b 14396 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14397 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14398 %2 = icmp sge <8 x i64> %0, %1 14399 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14400 %4 = bitcast <16 x i1> %3 to i16 14401 ret i16 %4 14402} 14403 14404define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 14405; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14406; VLX: # %bb.0: # %entry 14407; VLX-NEXT: kmovd %edi, %k1 14408; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14409; VLX-NEXT: kmovd %k0, %eax 14410; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14411; VLX-NEXT: vzeroupper 14412; VLX-NEXT: retq 14413; 14414; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14415; NoVLX: # %bb.0: # %entry 14416; NoVLX-NEXT: kmovw %edi, %k1 14417; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14418; NoVLX-NEXT: kmovw %k0, %eax 14419; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14420; NoVLX-NEXT: vzeroupper 14421; NoVLX-NEXT: retq 14422entry: 14423 %0 = bitcast <8 x i64> %__a to <8 x i64> 14424 %load = load i64, i64* %__b 14425 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14426 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14427 %2 = icmp sge <8 x i64> %0, %1 14428 %3 = bitcast i8 %__u to <8 x i1> 14429 %4 = and <8 x i1> %3, %2 14430 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14431 %6 = bitcast <16 x i1> %5 to i16 14432 ret i16 %6 14433} 14434 14435 14436define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14437; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: 14438; VLX: # %bb.0: # %entry 14439; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14440; VLX-NEXT: kmovd %k0, %eax 14441; VLX-NEXT: vzeroupper 14442; VLX-NEXT: retq 14443; 14444; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: 14445; NoVLX: # %bb.0: # %entry 14446; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14447; NoVLX-NEXT: kmovw %k0, %eax 14448; NoVLX-NEXT: vzeroupper 14449; NoVLX-NEXT: retq 14450entry: 14451 %0 = bitcast <8 x i64> %__a to <8 x i64> 14452 %1 = bitcast <8 x i64> %__b to <8 x i64> 14453 %2 = icmp sge <8 x i64> %0, %1 14454 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14455 %4 = bitcast <32 x i1> %3 to i32 14456 ret i32 %4 14457} 14458 14459define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14460; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: 14461; VLX: # %bb.0: # %entry 14462; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14463; VLX-NEXT: kmovd %k0, %eax 14464; VLX-NEXT: vzeroupper 14465; VLX-NEXT: retq 14466; 14467; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: 14468; NoVLX: # %bb.0: # %entry 14469; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14470; NoVLX-NEXT: kmovw %k0, %eax 14471; NoVLX-NEXT: vzeroupper 14472; NoVLX-NEXT: retq 14473entry: 14474 %0 = bitcast <8 x i64> %__a to <8 x i64> 14475 %load = load <8 x i64>, <8 x i64>* %__b 14476 %1 = bitcast <8 x i64> %load to <8 x i64> 14477 %2 = icmp sge <8 x i64> %0, %1 14478 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14479 %4 = bitcast <32 x i1> %3 to i32 14480 ret i32 %4 14481} 14482 14483define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14484; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: 14485; VLX: # %bb.0: # %entry 14486; VLX-NEXT: kmovd %edi, %k1 14487; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14488; VLX-NEXT: kmovd %k0, %eax 14489; VLX-NEXT: vzeroupper 14490; VLX-NEXT: retq 14491; 14492; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: 14493; NoVLX: # %bb.0: # %entry 14494; NoVLX-NEXT: kmovw %edi, %k1 14495; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14496; NoVLX-NEXT: kmovw %k0, %eax 14497; NoVLX-NEXT: vzeroupper 14498; NoVLX-NEXT: retq 14499entry: 14500 %0 = bitcast <8 x i64> %__a to <8 x i64> 14501 %1 = bitcast <8 x i64> %__b to <8 x i64> 14502 %2 = icmp sge <8 x i64> %0, %1 14503 %3 = bitcast i8 %__u to <8 x i1> 14504 %4 = and <8 x i1> %2, %3 14505 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14506 %6 = bitcast <32 x i1> %5 to i32 14507 ret i32 %6 14508} 14509 14510define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14511; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: 14512; VLX: # %bb.0: # %entry 14513; VLX-NEXT: kmovd %edi, %k1 14514; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14515; VLX-NEXT: kmovd %k0, %eax 14516; VLX-NEXT: vzeroupper 14517; VLX-NEXT: retq 14518; 14519; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: 14520; NoVLX: # %bb.0: # %entry 14521; NoVLX-NEXT: kmovw %edi, %k1 14522; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14523; NoVLX-NEXT: kmovw %k0, %eax 14524; NoVLX-NEXT: vzeroupper 14525; NoVLX-NEXT: retq 14526entry: 14527 %0 = bitcast <8 x i64> %__a to <8 x i64> 14528 %load = load <8 x i64>, <8 x i64>* %__b 14529 %1 = bitcast <8 x i64> %load to <8 x i64> 14530 %2 = icmp sge <8 x i64> %0, %1 14531 %3 = bitcast i8 %__u to <8 x i1> 14532 %4 = and <8 x i1> %2, %3 14533 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14534 %6 = bitcast <32 x i1> %5 to i32 14535 ret i32 %6 14536} 14537 14538 14539define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 14540; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14541; VLX: # %bb.0: # %entry 14542; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14543; VLX-NEXT: kmovd %k0, %eax 14544; VLX-NEXT: vzeroupper 14545; VLX-NEXT: retq 14546; 14547; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14548; NoVLX: # %bb.0: # %entry 14549; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14550; NoVLX-NEXT: kmovw %k0, %eax 14551; NoVLX-NEXT: vzeroupper 14552; NoVLX-NEXT: retq 14553entry: 14554 %0 = bitcast <8 x i64> %__a to <8 x i64> 14555 %load = load i64, i64* %__b 14556 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14557 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14558 %2 = icmp sge <8 x i64> %0, %1 14559 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14560 %4 = bitcast <32 x i1> %3 to i32 14561 ret i32 %4 14562} 14563 14564define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 14565; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14566; VLX: # %bb.0: # %entry 14567; VLX-NEXT: kmovd %edi, %k1 14568; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14569; VLX-NEXT: kmovd %k0, %eax 14570; VLX-NEXT: vzeroupper 14571; VLX-NEXT: retq 14572; 14573; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14574; NoVLX: # %bb.0: # %entry 14575; NoVLX-NEXT: kmovw %edi, %k1 14576; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14577; NoVLX-NEXT: kmovw %k0, %eax 14578; NoVLX-NEXT: vzeroupper 14579; NoVLX-NEXT: retq 14580entry: 14581 %0 = bitcast <8 x i64> %__a to <8 x i64> 14582 %load = load i64, i64* %__b 14583 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14584 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14585 %2 = icmp sge <8 x i64> %0, %1 14586 %3 = bitcast i8 %__u to <8 x i1> 14587 %4 = and <8 x i1> %3, %2 14588 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14589 %6 = bitcast <32 x i1> %5 to i32 14590 ret i32 %6 14591} 14592 14593 14594define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14595; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: 14596; VLX: # %bb.0: # %entry 14597; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14598; VLX-NEXT: kmovq %k0, %rax 14599; VLX-NEXT: vzeroupper 14600; VLX-NEXT: retq 14601; 14602; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: 14603; NoVLX: # %bb.0: # %entry 14604; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14605; NoVLX-NEXT: kmovw %k0, %eax 14606; NoVLX-NEXT: movzwl %ax, %eax 14607; NoVLX-NEXT: vzeroupper 14608; NoVLX-NEXT: retq 14609entry: 14610 %0 = bitcast <8 x i64> %__a to <8 x i64> 14611 %1 = bitcast <8 x i64> %__b to <8 x i64> 14612 %2 = icmp sge <8 x i64> %0, %1 14613 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14614 %4 = bitcast <64 x i1> %3 to i64 14615 ret i64 %4 14616} 14617 14618define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14619; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: 14620; VLX: # %bb.0: # %entry 14621; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14622; VLX-NEXT: kmovq %k0, %rax 14623; VLX-NEXT: vzeroupper 14624; VLX-NEXT: retq 14625; 14626; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: 14627; NoVLX: # %bb.0: # %entry 14628; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14629; NoVLX-NEXT: kmovw %k0, %eax 14630; NoVLX-NEXT: movzwl %ax, %eax 14631; NoVLX-NEXT: vzeroupper 14632; NoVLX-NEXT: retq 14633entry: 14634 %0 = bitcast <8 x i64> %__a to <8 x i64> 14635 %load = load <8 x i64>, <8 x i64>* %__b 14636 %1 = bitcast <8 x i64> %load to <8 x i64> 14637 %2 = icmp sge <8 x i64> %0, %1 14638 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14639 %4 = bitcast <64 x i1> %3 to i64 14640 ret i64 %4 14641} 14642 14643define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14644; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: 14645; VLX: # %bb.0: # %entry 14646; VLX-NEXT: kmovd %edi, %k1 14647; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14648; VLX-NEXT: kmovq %k0, %rax 14649; VLX-NEXT: vzeroupper 14650; VLX-NEXT: retq 14651; 14652; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: 14653; NoVLX: # %bb.0: # %entry 14654; NoVLX-NEXT: kmovw %edi, %k1 14655; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14656; NoVLX-NEXT: kmovw %k0, %eax 14657; NoVLX-NEXT: movzwl %ax, %eax 14658; NoVLX-NEXT: vzeroupper 14659; NoVLX-NEXT: retq 14660entry: 14661 %0 = bitcast <8 x i64> %__a to <8 x i64> 14662 %1 = bitcast <8 x i64> %__b to <8 x i64> 14663 %2 = icmp sge <8 x i64> %0, %1 14664 %3 = bitcast i8 %__u to <8 x i1> 14665 %4 = and <8 x i1> %2, %3 14666 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14667 %6 = bitcast <64 x i1> %5 to i64 14668 ret i64 %6 14669} 14670 14671define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 14672; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: 14673; VLX: # %bb.0: # %entry 14674; VLX-NEXT: kmovd %edi, %k1 14675; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14676; VLX-NEXT: kmovq %k0, %rax 14677; VLX-NEXT: vzeroupper 14678; VLX-NEXT: retq 14679; 14680; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: 14681; NoVLX: # %bb.0: # %entry 14682; NoVLX-NEXT: kmovw %edi, %k1 14683; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14684; NoVLX-NEXT: kmovw %k0, %eax 14685; NoVLX-NEXT: movzwl %ax, %eax 14686; NoVLX-NEXT: vzeroupper 14687; NoVLX-NEXT: retq 14688entry: 14689 %0 = bitcast <8 x i64> %__a to <8 x i64> 14690 %load = load <8 x i64>, <8 x i64>* %__b 14691 %1 = bitcast <8 x i64> %load to <8 x i64> 14692 %2 = icmp sge <8 x i64> %0, %1 14693 %3 = bitcast i8 %__u to <8 x i1> 14694 %4 = and <8 x i1> %2, %3 14695 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14696 %6 = bitcast <64 x i1> %5 to i64 14697 ret i64 %6 14698} 14699 14700 14701define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 14702; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14703; VLX: # %bb.0: # %entry 14704; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14705; VLX-NEXT: kmovq %k0, %rax 14706; VLX-NEXT: vzeroupper 14707; VLX-NEXT: retq 14708; 14709; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14710; NoVLX: # %bb.0: # %entry 14711; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14712; NoVLX-NEXT: kmovw %k0, %eax 14713; NoVLX-NEXT: movzwl %ax, %eax 14714; NoVLX-NEXT: vzeroupper 14715; NoVLX-NEXT: retq 14716entry: 14717 %0 = bitcast <8 x i64> %__a to <8 x i64> 14718 %load = load i64, i64* %__b 14719 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14720 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14721 %2 = icmp sge <8 x i64> %0, %1 14722 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14723 %4 = bitcast <64 x i1> %3 to i64 14724 ret i64 %4 14725} 14726 14727define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 14728; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14729; VLX: # %bb.0: # %entry 14730; VLX-NEXT: kmovd %edi, %k1 14731; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14732; VLX-NEXT: kmovq %k0, %rax 14733; VLX-NEXT: vzeroupper 14734; VLX-NEXT: retq 14735; 14736; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14737; NoVLX: # %bb.0: # %entry 14738; NoVLX-NEXT: kmovw %edi, %k1 14739; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14740; NoVLX-NEXT: kmovw %k0, %eax 14741; NoVLX-NEXT: movzwl %ax, %eax 14742; NoVLX-NEXT: vzeroupper 14743; NoVLX-NEXT: retq 14744entry: 14745 %0 = bitcast <8 x i64> %__a to <8 x i64> 14746 %load = load i64, i64* %__b 14747 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14748 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14749 %2 = icmp sge <8 x i64> %0, %1 14750 %3 = bitcast i8 %__u to <8 x i1> 14751 %4 = and <8 x i1> %3, %2 14752 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14753 %6 = bitcast <64 x i1> %5 to i64 14754 ret i64 %6 14755} 14756 14757 14758define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14759; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: 14760; VLX: # %bb.0: # %entry 14761; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 14762; VLX-NEXT: kmovd %k0, %eax 14763; VLX-NEXT: retq 14764; 14765; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: 14766; NoVLX: # %bb.0: # %entry 14767; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14768; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14769; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14770; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14771; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14772; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14773; NoVLX-NEXT: kmovw %k0, %eax 14774; NoVLX-NEXT: vzeroupper 14775; NoVLX-NEXT: retq 14776entry: 14777 %0 = bitcast <2 x i64> %__a to <16 x i8> 14778 %1 = bitcast <2 x i64> %__b to <16 x i8> 14779 %2 = icmp ult <16 x i8> %0, %1 14780 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14781 %4 = bitcast <32 x i1> %3 to i32 14782 ret i32 %4 14783} 14784 14785define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 14786; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: 14787; VLX: # %bb.0: # %entry 14788; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 14789; VLX-NEXT: kmovd %k0, %eax 14790; VLX-NEXT: retq 14791; 14792; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: 14793; NoVLX: # %bb.0: # %entry 14794; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 14795; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14796; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14797; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14798; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14799; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14800; NoVLX-NEXT: kmovw %k0, %eax 14801; NoVLX-NEXT: vzeroupper 14802; NoVLX-NEXT: retq 14803entry: 14804 %0 = bitcast <2 x i64> %__a to <16 x i8> 14805 %load = load <2 x i64>, <2 x i64>* %__b 14806 %1 = bitcast <2 x i64> %load to <16 x i8> 14807 %2 = icmp ult <16 x i8> %0, %1 14808 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14809 %4 = bitcast <32 x i1> %3 to i32 14810 ret i32 %4 14811} 14812 14813define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14814; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: 14815; VLX: # %bb.0: # %entry 14816; VLX-NEXT: kmovd %edi, %k1 14817; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} 14818; VLX-NEXT: kmovd %k0, %eax 14819; VLX-NEXT: retq 14820; 14821; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: 14822; NoVLX: # %bb.0: # %entry 14823; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14824; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14825; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14826; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14827; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14828; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14829; NoVLX-NEXT: kmovw %k0, %eax 14830; NoVLX-NEXT: andl %edi, %eax 14831; NoVLX-NEXT: vzeroupper 14832; NoVLX-NEXT: retq 14833entry: 14834 %0 = bitcast <2 x i64> %__a to <16 x i8> 14835 %1 = bitcast <2 x i64> %__b to <16 x i8> 14836 %2 = icmp ult <16 x i8> %0, %1 14837 %3 = bitcast i16 %__u to <16 x i1> 14838 %4 = and <16 x i1> %2, %3 14839 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14840 %6 = bitcast <32 x i1> %5 to i32 14841 ret i32 %6 14842} 14843 14844define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 14845; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: 14846; VLX: # %bb.0: # %entry 14847; VLX-NEXT: kmovd %edi, %k1 14848; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} 14849; VLX-NEXT: kmovd %k0, %eax 14850; VLX-NEXT: retq 14851; 14852; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: 14853; NoVLX: # %bb.0: # %entry 14854; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 14855; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14856; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14857; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14858; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14859; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14860; NoVLX-NEXT: kmovw %k0, %eax 14861; NoVLX-NEXT: andl %edi, %eax 14862; NoVLX-NEXT: vzeroupper 14863; NoVLX-NEXT: retq 14864entry: 14865 %0 = bitcast <2 x i64> %__a to <16 x i8> 14866 %load = load <2 x i64>, <2 x i64>* %__b 14867 %1 = bitcast <2 x i64> %load to <16 x i8> 14868 %2 = icmp ult <16 x i8> %0, %1 14869 %3 = bitcast i16 %__u to <16 x i1> 14870 %4 = and <16 x i1> %2, %3 14871 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14872 %6 = bitcast <32 x i1> %5 to i32 14873 ret i32 %6 14874} 14875 14876 14877define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14878; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: 14879; VLX: # %bb.0: # %entry 14880; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 14881; VLX-NEXT: kmovq %k0, %rax 14882; VLX-NEXT: retq 14883; 14884; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: 14885; NoVLX: # %bb.0: # %entry 14886; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14887; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14888; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14889; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14890; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14891; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14892; NoVLX-NEXT: kmovw %k0, %eax 14893; NoVLX-NEXT: movzwl %ax, %eax 14894; NoVLX-NEXT: vzeroupper 14895; NoVLX-NEXT: retq 14896entry: 14897 %0 = bitcast <2 x i64> %__a to <16 x i8> 14898 %1 = bitcast <2 x i64> %__b to <16 x i8> 14899 %2 = icmp ult <16 x i8> %0, %1 14900 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14901 %4 = bitcast <64 x i1> %3 to i64 14902 ret i64 %4 14903} 14904 14905define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 14906; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: 14907; VLX: # %bb.0: # %entry 14908; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 14909; VLX-NEXT: kmovq %k0, %rax 14910; VLX-NEXT: retq 14911; 14912; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: 14913; NoVLX: # %bb.0: # %entry 14914; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 14915; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14916; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14917; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14918; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14919; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14920; NoVLX-NEXT: kmovw %k0, %eax 14921; NoVLX-NEXT: movzwl %ax, %eax 14922; NoVLX-NEXT: vzeroupper 14923; NoVLX-NEXT: retq 14924entry: 14925 %0 = bitcast <2 x i64> %__a to <16 x i8> 14926 %load = load <2 x i64>, <2 x i64>* %__b 14927 %1 = bitcast <2 x i64> %load to <16 x i8> 14928 %2 = icmp ult <16 x i8> %0, %1 14929 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14930 %4 = bitcast <64 x i1> %3 to i64 14931 ret i64 %4 14932} 14933 14934define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14935; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: 14936; VLX: # %bb.0: # %entry 14937; VLX-NEXT: kmovd %edi, %k1 14938; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} 14939; VLX-NEXT: kmovq %k0, %rax 14940; VLX-NEXT: retq 14941; 14942; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: 14943; NoVLX: # %bb.0: # %entry 14944; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14945; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14946; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14947; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14948; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14949; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14950; NoVLX-NEXT: kmovw %k0, %eax 14951; NoVLX-NEXT: andl %edi, %eax 14952; NoVLX-NEXT: vzeroupper 14953; NoVLX-NEXT: retq 14954entry: 14955 %0 = bitcast <2 x i64> %__a to <16 x i8> 14956 %1 = bitcast <2 x i64> %__b to <16 x i8> 14957 %2 = icmp ult <16 x i8> %0, %1 14958 %3 = bitcast i16 %__u to <16 x i1> 14959 %4 = and <16 x i1> %2, %3 14960 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14961 %6 = bitcast <64 x i1> %5 to i64 14962 ret i64 %6 14963} 14964 14965define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 14966; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: 14967; VLX: # %bb.0: # %entry 14968; VLX-NEXT: kmovd %edi, %k1 14969; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} 14970; VLX-NEXT: kmovq %k0, %rax 14971; VLX-NEXT: retq 14972; 14973; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: 14974; NoVLX: # %bb.0: # %entry 14975; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 14976; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14977; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14978; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14979; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 14980; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14981; NoVLX-NEXT: kmovw %k0, %eax 14982; NoVLX-NEXT: andl %edi, %eax 14983; NoVLX-NEXT: vzeroupper 14984; NoVLX-NEXT: retq 14985entry: 14986 %0 = bitcast <2 x i64> %__a to <16 x i8> 14987 %load = load <2 x i64>, <2 x i64>* %__b 14988 %1 = bitcast <2 x i64> %load to <16 x i8> 14989 %2 = icmp ult <16 x i8> %0, %1 14990 %3 = bitcast i16 %__u to <16 x i1> 14991 %4 = and <16 x i1> %2, %3 14992 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14993 %6 = bitcast <64 x i1> %5 to i64 14994 ret i64 %6 14995} 14996 14997 14998define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 14999; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: 15000; VLX: # %bb.0: # %entry 15001; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 15002; VLX-NEXT: kmovq %k0, %rax 15003; VLX-NEXT: vzeroupper 15004; VLX-NEXT: retq 15005; 15006; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: 15007; NoVLX: # %bb.0: # %entry 15008; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 15009; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 15010; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15011; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 15012; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 15013; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15014; NoVLX-NEXT: kmovw %k0, %ecx 15015; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 15016; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 15017; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15018; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15019; NoVLX-NEXT: kmovw %k0, %eax 15020; NoVLX-NEXT: shll $16, %eax 15021; NoVLX-NEXT: orl %ecx, %eax 15022; NoVLX-NEXT: vzeroupper 15023; NoVLX-NEXT: retq 15024entry: 15025 %0 = bitcast <4 x i64> %__a to <32 x i8> 15026 %1 = bitcast <4 x i64> %__b to <32 x i8> 15027 %2 = icmp ult <32 x i8> %0, %1 15028 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15029 %4 = bitcast <64 x i1> %3 to i64 15030 ret i64 %4 15031} 15032 15033define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15034; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: 15035; VLX: # %bb.0: # %entry 15036; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0 15037; VLX-NEXT: kmovq %k0, %rax 15038; VLX-NEXT: vzeroupper 15039; VLX-NEXT: retq 15040; 15041; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: 15042; NoVLX: # %bb.0: # %entry 15043; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1 15044; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 15045; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15046; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 15047; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 15048; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15049; NoVLX-NEXT: kmovw %k0, %ecx 15050; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 15051; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 15052; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15053; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15054; NoVLX-NEXT: kmovw %k0, %eax 15055; NoVLX-NEXT: shll $16, %eax 15056; NoVLX-NEXT: orl %ecx, %eax 15057; NoVLX-NEXT: vzeroupper 15058; NoVLX-NEXT: retq 15059entry: 15060 %0 = bitcast <4 x i64> %__a to <32 x i8> 15061 %load = load <4 x i64>, <4 x i64>* %__b 15062 %1 = bitcast <4 x i64> %load to <32 x i8> 15063 %2 = icmp ult <32 x i8> %0, %1 15064 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15065 %4 = bitcast <64 x i1> %3 to i64 15066 ret i64 %4 15067} 15068 15069define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15070; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: 15071; VLX: # %bb.0: # %entry 15072; VLX-NEXT: kmovd %edi, %k1 15073; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} 15074; VLX-NEXT: kmovq %k0, %rax 15075; VLX-NEXT: vzeroupper 15076; VLX-NEXT: retq 15077; 15078; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: 15079; NoVLX: # %bb.0: # %entry 15080; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 15081; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 15082; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15083; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 15084; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 15085; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15086; NoVLX-NEXT: kmovw %k0, %eax 15087; NoVLX-NEXT: andl %edi, %eax 15088; NoVLX-NEXT: shrl $16, %edi 15089; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 15090; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 15091; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15092; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15093; NoVLX-NEXT: kmovw %k0, %ecx 15094; NoVLX-NEXT: andl %edi, %ecx 15095; NoVLX-NEXT: shll $16, %ecx 15096; NoVLX-NEXT: movzwl %ax, %eax 15097; NoVLX-NEXT: orl %ecx, %eax 15098; NoVLX-NEXT: vzeroupper 15099; NoVLX-NEXT: retq 15100entry: 15101 %0 = bitcast <4 x i64> %__a to <32 x i8> 15102 %1 = bitcast <4 x i64> %__b to <32 x i8> 15103 %2 = icmp ult <32 x i8> %0, %1 15104 %3 = bitcast i32 %__u to <32 x i1> 15105 %4 = and <32 x i1> %2, %3 15106 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15107 %6 = bitcast <64 x i1> %5 to i64 15108 ret i64 %6 15109} 15110 15111define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15112; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: 15113; VLX: # %bb.0: # %entry 15114; VLX-NEXT: kmovd %edi, %k1 15115; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1} 15116; VLX-NEXT: kmovq %k0, %rax 15117; VLX-NEXT: vzeroupper 15118; VLX-NEXT: retq 15119; 15120; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: 15121; NoVLX: # %bb.0: # %entry 15122; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1 15123; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 15124; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15125; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 15126; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 15127; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15128; NoVLX-NEXT: kmovw %k0, %eax 15129; NoVLX-NEXT: andl %edi, %eax 15130; NoVLX-NEXT: shrl $16, %edi 15131; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 15132; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 15133; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15134; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15135; NoVLX-NEXT: kmovw %k0, %ecx 15136; NoVLX-NEXT: andl %edi, %ecx 15137; NoVLX-NEXT: shll $16, %ecx 15138; NoVLX-NEXT: movzwl %ax, %eax 15139; NoVLX-NEXT: orl %ecx, %eax 15140; NoVLX-NEXT: vzeroupper 15141; NoVLX-NEXT: retq 15142entry: 15143 %0 = bitcast <4 x i64> %__a to <32 x i8> 15144 %load = load <4 x i64>, <4 x i64>* %__b 15145 %1 = bitcast <4 x i64> %load to <32 x i8> 15146 %2 = icmp ult <32 x i8> %0, %1 15147 %3 = bitcast i32 %__u to <32 x i1> 15148 %4 = and <32 x i1> %2, %3 15149 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15150 %6 = bitcast <64 x i1> %5 to i64 15151 ret i64 %6 15152} 15153 15154 15155define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15156; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: 15157; VLX: # %bb.0: # %entry 15158; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 15159; VLX-NEXT: kmovd %k0, %eax 15160; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15161; VLX-NEXT: retq 15162; 15163; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: 15164; NoVLX: # %bb.0: # %entry 15165; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15166; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15167; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15168; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15169; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15170; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15171; NoVLX-NEXT: kmovw %k0, %eax 15172; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15173; NoVLX-NEXT: vzeroupper 15174; NoVLX-NEXT: retq 15175entry: 15176 %0 = bitcast <2 x i64> %__a to <8 x i16> 15177 %1 = bitcast <2 x i64> %__b to <8 x i16> 15178 %2 = icmp ult <8 x i16> %0, %1 15179 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15180 %4 = bitcast <16 x i1> %3 to i16 15181 ret i16 %4 15182} 15183 15184define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15185; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: 15186; VLX: # %bb.0: # %entry 15187; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 15188; VLX-NEXT: kmovd %k0, %eax 15189; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15190; VLX-NEXT: retq 15191; 15192; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: 15193; NoVLX: # %bb.0: # %entry 15194; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 15195; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15196; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15197; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15198; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15199; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15200; NoVLX-NEXT: kmovw %k0, %eax 15201; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15202; NoVLX-NEXT: vzeroupper 15203; NoVLX-NEXT: retq 15204entry: 15205 %0 = bitcast <2 x i64> %__a to <8 x i16> 15206 %load = load <2 x i64>, <2 x i64>* %__b 15207 %1 = bitcast <2 x i64> %load to <8 x i16> 15208 %2 = icmp ult <8 x i16> %0, %1 15209 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15210 %4 = bitcast <16 x i1> %3 to i16 15211 ret i16 %4 15212} 15213 15214define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15215; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: 15216; VLX: # %bb.0: # %entry 15217; VLX-NEXT: kmovd %edi, %k1 15218; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 15219; VLX-NEXT: kmovd %k0, %eax 15220; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15221; VLX-NEXT: retq 15222; 15223; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: 15224; NoVLX: # %bb.0: # %entry 15225; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15226; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15227; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15228; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15229; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15230; NoVLX-NEXT: kmovw %edi, %k1 15231; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15232; NoVLX-NEXT: kmovw %k0, %eax 15233; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15234; NoVLX-NEXT: vzeroupper 15235; NoVLX-NEXT: retq 15236entry: 15237 %0 = bitcast <2 x i64> %__a to <8 x i16> 15238 %1 = bitcast <2 x i64> %__b to <8 x i16> 15239 %2 = icmp ult <8 x i16> %0, %1 15240 %3 = bitcast i8 %__u to <8 x i1> 15241 %4 = and <8 x i1> %2, %3 15242 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15243 %6 = bitcast <16 x i1> %5 to i16 15244 ret i16 %6 15245} 15246 15247define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15248; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: 15249; VLX: # %bb.0: # %entry 15250; VLX-NEXT: kmovd %edi, %k1 15251; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 15252; VLX-NEXT: kmovd %k0, %eax 15253; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15254; VLX-NEXT: retq 15255; 15256; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: 15257; NoVLX: # %bb.0: # %entry 15258; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 15259; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15260; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15261; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15262; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15263; NoVLX-NEXT: kmovw %edi, %k1 15264; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15265; NoVLX-NEXT: kmovw %k0, %eax 15266; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15267; NoVLX-NEXT: vzeroupper 15268; NoVLX-NEXT: retq 15269entry: 15270 %0 = bitcast <2 x i64> %__a to <8 x i16> 15271 %load = load <2 x i64>, <2 x i64>* %__b 15272 %1 = bitcast <2 x i64> %load to <8 x i16> 15273 %2 = icmp ult <8 x i16> %0, %1 15274 %3 = bitcast i8 %__u to <8 x i1> 15275 %4 = and <8 x i1> %2, %3 15276 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15277 %6 = bitcast <16 x i1> %5 to i16 15278 ret i16 %6 15279} 15280 15281 15282define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15283; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: 15284; VLX: # %bb.0: # %entry 15285; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 15286; VLX-NEXT: kmovd %k0, %eax 15287; VLX-NEXT: retq 15288; 15289; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: 15290; NoVLX: # %bb.0: # %entry 15291; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15292; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15293; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15294; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15295; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15296; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15297; NoVLX-NEXT: kmovw %k0, %eax 15298; NoVLX-NEXT: vzeroupper 15299; NoVLX-NEXT: retq 15300entry: 15301 %0 = bitcast <2 x i64> %__a to <8 x i16> 15302 %1 = bitcast <2 x i64> %__b to <8 x i16> 15303 %2 = icmp ult <8 x i16> %0, %1 15304 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15305 %4 = bitcast <32 x i1> %3 to i32 15306 ret i32 %4 15307} 15308 15309define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15310; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: 15311; VLX: # %bb.0: # %entry 15312; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 15313; VLX-NEXT: kmovd %k0, %eax 15314; VLX-NEXT: retq 15315; 15316; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: 15317; NoVLX: # %bb.0: # %entry 15318; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 15319; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15320; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15321; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15322; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15323; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15324; NoVLX-NEXT: kmovw %k0, %eax 15325; NoVLX-NEXT: vzeroupper 15326; NoVLX-NEXT: retq 15327entry: 15328 %0 = bitcast <2 x i64> %__a to <8 x i16> 15329 %load = load <2 x i64>, <2 x i64>* %__b 15330 %1 = bitcast <2 x i64> %load to <8 x i16> 15331 %2 = icmp ult <8 x i16> %0, %1 15332 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15333 %4 = bitcast <32 x i1> %3 to i32 15334 ret i32 %4 15335} 15336 15337define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15338; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: 15339; VLX: # %bb.0: # %entry 15340; VLX-NEXT: kmovd %edi, %k1 15341; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 15342; VLX-NEXT: kmovd %k0, %eax 15343; VLX-NEXT: retq 15344; 15345; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: 15346; NoVLX: # %bb.0: # %entry 15347; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15348; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15349; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15350; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15351; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15352; NoVLX-NEXT: kmovw %edi, %k1 15353; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15354; NoVLX-NEXT: kmovw %k0, %eax 15355; NoVLX-NEXT: vzeroupper 15356; NoVLX-NEXT: retq 15357entry: 15358 %0 = bitcast <2 x i64> %__a to <8 x i16> 15359 %1 = bitcast <2 x i64> %__b to <8 x i16> 15360 %2 = icmp ult <8 x i16> %0, %1 15361 %3 = bitcast i8 %__u to <8 x i1> 15362 %4 = and <8 x i1> %2, %3 15363 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15364 %6 = bitcast <32 x i1> %5 to i32 15365 ret i32 %6 15366} 15367 15368define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15369; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: 15370; VLX: # %bb.0: # %entry 15371; VLX-NEXT: kmovd %edi, %k1 15372; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 15373; VLX-NEXT: kmovd %k0, %eax 15374; VLX-NEXT: retq 15375; 15376; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: 15377; NoVLX: # %bb.0: # %entry 15378; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 15379; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15380; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15381; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15382; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15383; NoVLX-NEXT: kmovw %edi, %k1 15384; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15385; NoVLX-NEXT: kmovw %k0, %eax 15386; NoVLX-NEXT: vzeroupper 15387; NoVLX-NEXT: retq 15388entry: 15389 %0 = bitcast <2 x i64> %__a to <8 x i16> 15390 %load = load <2 x i64>, <2 x i64>* %__b 15391 %1 = bitcast <2 x i64> %load to <8 x i16> 15392 %2 = icmp ult <8 x i16> %0, %1 15393 %3 = bitcast i8 %__u to <8 x i1> 15394 %4 = and <8 x i1> %2, %3 15395 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15396 %6 = bitcast <32 x i1> %5 to i32 15397 ret i32 %6 15398} 15399 15400 15401define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15402; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: 15403; VLX: # %bb.0: # %entry 15404; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 15405; VLX-NEXT: kmovq %k0, %rax 15406; VLX-NEXT: retq 15407; 15408; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: 15409; NoVLX: # %bb.0: # %entry 15410; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15411; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15412; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15413; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15414; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15415; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15416; NoVLX-NEXT: kmovw %k0, %eax 15417; NoVLX-NEXT: movzwl %ax, %eax 15418; NoVLX-NEXT: vzeroupper 15419; NoVLX-NEXT: retq 15420entry: 15421 %0 = bitcast <2 x i64> %__a to <8 x i16> 15422 %1 = bitcast <2 x i64> %__b to <8 x i16> 15423 %2 = icmp ult <8 x i16> %0, %1 15424 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15425 %4 = bitcast <64 x i1> %3 to i64 15426 ret i64 %4 15427} 15428 15429define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15430; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: 15431; VLX: # %bb.0: # %entry 15432; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 15433; VLX-NEXT: kmovq %k0, %rax 15434; VLX-NEXT: retq 15435; 15436; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: 15437; NoVLX: # %bb.0: # %entry 15438; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 15439; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15440; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15441; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15442; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15443; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15444; NoVLX-NEXT: kmovw %k0, %eax 15445; NoVLX-NEXT: movzwl %ax, %eax 15446; NoVLX-NEXT: vzeroupper 15447; NoVLX-NEXT: retq 15448entry: 15449 %0 = bitcast <2 x i64> %__a to <8 x i16> 15450 %load = load <2 x i64>, <2 x i64>* %__b 15451 %1 = bitcast <2 x i64> %load to <8 x i16> 15452 %2 = icmp ult <8 x i16> %0, %1 15453 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15454 %4 = bitcast <64 x i1> %3 to i64 15455 ret i64 %4 15456} 15457 15458define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15459; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: 15460; VLX: # %bb.0: # %entry 15461; VLX-NEXT: kmovd %edi, %k1 15462; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 15463; VLX-NEXT: kmovq %k0, %rax 15464; VLX-NEXT: retq 15465; 15466; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: 15467; NoVLX: # %bb.0: # %entry 15468; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15469; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15470; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15471; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15472; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15473; NoVLX-NEXT: kmovw %edi, %k1 15474; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15475; NoVLX-NEXT: kmovw %k0, %eax 15476; NoVLX-NEXT: movzwl %ax, %eax 15477; NoVLX-NEXT: vzeroupper 15478; NoVLX-NEXT: retq 15479entry: 15480 %0 = bitcast <2 x i64> %__a to <8 x i16> 15481 %1 = bitcast <2 x i64> %__b to <8 x i16> 15482 %2 = icmp ult <8 x i16> %0, %1 15483 %3 = bitcast i8 %__u to <8 x i1> 15484 %4 = and <8 x i1> %2, %3 15485 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15486 %6 = bitcast <64 x i1> %5 to i64 15487 ret i64 %6 15488} 15489 15490define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15491; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: 15492; VLX: # %bb.0: # %entry 15493; VLX-NEXT: kmovd %edi, %k1 15494; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 15495; VLX-NEXT: kmovq %k0, %rax 15496; VLX-NEXT: retq 15497; 15498; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: 15499; NoVLX: # %bb.0: # %entry 15500; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 15501; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15502; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15503; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15504; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 15505; NoVLX-NEXT: kmovw %edi, %k1 15506; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15507; NoVLX-NEXT: kmovw %k0, %eax 15508; NoVLX-NEXT: movzwl %ax, %eax 15509; NoVLX-NEXT: vzeroupper 15510; NoVLX-NEXT: retq 15511entry: 15512 %0 = bitcast <2 x i64> %__a to <8 x i16> 15513 %load = load <2 x i64>, <2 x i64>* %__b 15514 %1 = bitcast <2 x i64> %load to <8 x i16> 15515 %2 = icmp ult <8 x i16> %0, %1 15516 %3 = bitcast i8 %__u to <8 x i1> 15517 %4 = and <8 x i1> %2, %3 15518 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15519 %6 = bitcast <64 x i1> %5 to i64 15520 ret i64 %6 15521} 15522 15523 15524define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15525; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: 15526; VLX: # %bb.0: # %entry 15527; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 15528; VLX-NEXT: kmovd %k0, %eax 15529; VLX-NEXT: vzeroupper 15530; VLX-NEXT: retq 15531; 15532; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: 15533; NoVLX: # %bb.0: # %entry 15534; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15535; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15536; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15537; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15538; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15539; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15540; NoVLX-NEXT: kmovw %k0, %eax 15541; NoVLX-NEXT: vzeroupper 15542; NoVLX-NEXT: retq 15543entry: 15544 %0 = bitcast <4 x i64> %__a to <16 x i16> 15545 %1 = bitcast <4 x i64> %__b to <16 x i16> 15546 %2 = icmp ult <16 x i16> %0, %1 15547 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15548 %4 = bitcast <32 x i1> %3 to i32 15549 ret i32 %4 15550} 15551 15552define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15553; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: 15554; VLX: # %bb.0: # %entry 15555; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 15556; VLX-NEXT: kmovd %k0, %eax 15557; VLX-NEXT: vzeroupper 15558; VLX-NEXT: retq 15559; 15560; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: 15561; NoVLX: # %bb.0: # %entry 15562; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 15563; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15564; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15565; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15566; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15567; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15568; NoVLX-NEXT: kmovw %k0, %eax 15569; NoVLX-NEXT: vzeroupper 15570; NoVLX-NEXT: retq 15571entry: 15572 %0 = bitcast <4 x i64> %__a to <16 x i16> 15573 %load = load <4 x i64>, <4 x i64>* %__b 15574 %1 = bitcast <4 x i64> %load to <16 x i16> 15575 %2 = icmp ult <16 x i16> %0, %1 15576 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15577 %4 = bitcast <32 x i1> %3 to i32 15578 ret i32 %4 15579} 15580 15581define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15582; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: 15583; VLX: # %bb.0: # %entry 15584; VLX-NEXT: kmovd %edi, %k1 15585; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} 15586; VLX-NEXT: kmovd %k0, %eax 15587; VLX-NEXT: vzeroupper 15588; VLX-NEXT: retq 15589; 15590; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: 15591; NoVLX: # %bb.0: # %entry 15592; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15593; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15594; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15595; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15596; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15597; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15598; NoVLX-NEXT: kmovw %k0, %eax 15599; NoVLX-NEXT: andl %edi, %eax 15600; NoVLX-NEXT: vzeroupper 15601; NoVLX-NEXT: retq 15602entry: 15603 %0 = bitcast <4 x i64> %__a to <16 x i16> 15604 %1 = bitcast <4 x i64> %__b to <16 x i16> 15605 %2 = icmp ult <16 x i16> %0, %1 15606 %3 = bitcast i16 %__u to <16 x i1> 15607 %4 = and <16 x i1> %2, %3 15608 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15609 %6 = bitcast <32 x i1> %5 to i32 15610 ret i32 %6 15611} 15612 15613define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15614; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: 15615; VLX: # %bb.0: # %entry 15616; VLX-NEXT: kmovd %edi, %k1 15617; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} 15618; VLX-NEXT: kmovd %k0, %eax 15619; VLX-NEXT: vzeroupper 15620; VLX-NEXT: retq 15621; 15622; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: 15623; NoVLX: # %bb.0: # %entry 15624; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15625; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15626; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15627; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15628; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15629; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15630; NoVLX-NEXT: kmovw %k0, %eax 15631; NoVLX-NEXT: andl %edi, %eax 15632; NoVLX-NEXT: vzeroupper 15633; NoVLX-NEXT: retq 15634entry: 15635 %0 = bitcast <4 x i64> %__a to <16 x i16> 15636 %load = load <4 x i64>, <4 x i64>* %__b 15637 %1 = bitcast <4 x i64> %load to <16 x i16> 15638 %2 = icmp ult <16 x i16> %0, %1 15639 %3 = bitcast i16 %__u to <16 x i1> 15640 %4 = and <16 x i1> %2, %3 15641 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15642 %6 = bitcast <32 x i1> %5 to i32 15643 ret i32 %6 15644} 15645 15646 15647define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15648; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: 15649; VLX: # %bb.0: # %entry 15650; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 15651; VLX-NEXT: kmovq %k0, %rax 15652; VLX-NEXT: vzeroupper 15653; VLX-NEXT: retq 15654; 15655; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: 15656; NoVLX: # %bb.0: # %entry 15657; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15658; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15659; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15660; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15661; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15662; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15663; NoVLX-NEXT: kmovw %k0, %eax 15664; NoVLX-NEXT: movzwl %ax, %eax 15665; NoVLX-NEXT: vzeroupper 15666; NoVLX-NEXT: retq 15667entry: 15668 %0 = bitcast <4 x i64> %__a to <16 x i16> 15669 %1 = bitcast <4 x i64> %__b to <16 x i16> 15670 %2 = icmp ult <16 x i16> %0, %1 15671 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15672 %4 = bitcast <64 x i1> %3 to i64 15673 ret i64 %4 15674} 15675 15676define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15677; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: 15678; VLX: # %bb.0: # %entry 15679; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 15680; VLX-NEXT: kmovq %k0, %rax 15681; VLX-NEXT: vzeroupper 15682; VLX-NEXT: retq 15683; 15684; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: 15685; NoVLX: # %bb.0: # %entry 15686; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 15687; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15688; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15689; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15690; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15691; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15692; NoVLX-NEXT: kmovw %k0, %eax 15693; NoVLX-NEXT: movzwl %ax, %eax 15694; NoVLX-NEXT: vzeroupper 15695; NoVLX-NEXT: retq 15696entry: 15697 %0 = bitcast <4 x i64> %__a to <16 x i16> 15698 %load = load <4 x i64>, <4 x i64>* %__b 15699 %1 = bitcast <4 x i64> %load to <16 x i16> 15700 %2 = icmp ult <16 x i16> %0, %1 15701 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15702 %4 = bitcast <64 x i1> %3 to i64 15703 ret i64 %4 15704} 15705 15706define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15707; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: 15708; VLX: # %bb.0: # %entry 15709; VLX-NEXT: kmovd %edi, %k1 15710; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} 15711; VLX-NEXT: kmovq %k0, %rax 15712; VLX-NEXT: vzeroupper 15713; VLX-NEXT: retq 15714; 15715; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: 15716; NoVLX: # %bb.0: # %entry 15717; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15718; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15719; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15720; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15721; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15722; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15723; NoVLX-NEXT: kmovw %k0, %eax 15724; NoVLX-NEXT: andl %edi, %eax 15725; NoVLX-NEXT: vzeroupper 15726; NoVLX-NEXT: retq 15727entry: 15728 %0 = bitcast <4 x i64> %__a to <16 x i16> 15729 %1 = bitcast <4 x i64> %__b to <16 x i16> 15730 %2 = icmp ult <16 x i16> %0, %1 15731 %3 = bitcast i16 %__u to <16 x i1> 15732 %4 = and <16 x i1> %2, %3 15733 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15734 %6 = bitcast <64 x i1> %5 to i64 15735 ret i64 %6 15736} 15737 15738define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 15739; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: 15740; VLX: # %bb.0: # %entry 15741; VLX-NEXT: kmovd %edi, %k1 15742; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} 15743; VLX-NEXT: kmovq %k0, %rax 15744; VLX-NEXT: vzeroupper 15745; VLX-NEXT: retq 15746; 15747; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: 15748; NoVLX: # %bb.0: # %entry 15749; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15750; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15751; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15752; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15753; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15754; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15755; NoVLX-NEXT: kmovw %k0, %eax 15756; NoVLX-NEXT: andl %edi, %eax 15757; NoVLX-NEXT: vzeroupper 15758; NoVLX-NEXT: retq 15759entry: 15760 %0 = bitcast <4 x i64> %__a to <16 x i16> 15761 %load = load <4 x i64>, <4 x i64>* %__b 15762 %1 = bitcast <4 x i64> %load to <16 x i16> 15763 %2 = icmp ult <16 x i16> %0, %1 15764 %3 = bitcast i16 %__u to <16 x i1> 15765 %4 = and <16 x i1> %2, %3 15766 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15767 %6 = bitcast <64 x i1> %5 to i64 15768 ret i64 %6 15769} 15770 15771 15772define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 15773; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: 15774; VLX: # %bb.0: # %entry 15775; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 15776; VLX-NEXT: kmovq %k0, %rax 15777; VLX-NEXT: vzeroupper 15778; VLX-NEXT: retq 15779; 15780; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: 15781; NoVLX: # %bb.0: # %entry 15782; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 15783; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 15784; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15785; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15786; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15787; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15788; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15789; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15790; NoVLX-NEXT: kmovw %k0, %ecx 15791; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0 15792; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0 15793; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15794; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15795; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15796; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15797; NoVLX-NEXT: kmovw %k0, %eax 15798; NoVLX-NEXT: shll $16, %eax 15799; NoVLX-NEXT: orl %ecx, %eax 15800; NoVLX-NEXT: vzeroupper 15801; NoVLX-NEXT: retq 15802entry: 15803 %0 = bitcast <8 x i64> %__a to <32 x i16> 15804 %1 = bitcast <8 x i64> %__b to <32 x i16> 15805 %2 = icmp ult <32 x i16> %0, %1 15806 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15807 %4 = bitcast <64 x i1> %3 to i64 15808 ret i64 %4 15809} 15810 15811define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 15812; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: 15813; VLX: # %bb.0: # %entry 15814; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0 15815; VLX-NEXT: kmovq %k0, %rax 15816; VLX-NEXT: vzeroupper 15817; VLX-NEXT: retq 15818; 15819; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: 15820; NoVLX: # %bb.0: # %entry 15821; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 15822; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2 15823; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 15824; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15825; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15826; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15827; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15828; NoVLX-NEXT: kmovw %k0, %ecx 15829; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm1, %ymm0 15830; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 15831; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15832; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15833; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15834; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15835; NoVLX-NEXT: kmovw %k0, %eax 15836; NoVLX-NEXT: shll $16, %eax 15837; NoVLX-NEXT: orl %ecx, %eax 15838; NoVLX-NEXT: vzeroupper 15839; NoVLX-NEXT: retq 15840entry: 15841 %0 = bitcast <8 x i64> %__a to <32 x i16> 15842 %load = load <8 x i64>, <8 x i64>* %__b 15843 %1 = bitcast <8 x i64> %load to <32 x i16> 15844 %2 = icmp ult <32 x i16> %0, %1 15845 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15846 %4 = bitcast <64 x i1> %3 to i64 15847 ret i64 %4 15848} 15849 15850define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 15851; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: 15852; VLX: # %bb.0: # %entry 15853; VLX-NEXT: kmovd %edi, %k1 15854; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 15855; VLX-NEXT: kmovq %k0, %rax 15856; VLX-NEXT: vzeroupper 15857; VLX-NEXT: retq 15858; 15859; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: 15860; NoVLX: # %bb.0: # %entry 15861; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 15862; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 15863; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 15864; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 15865; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 15866; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 15867; NoVLX-NEXT: kmovw %k0, %eax 15868; NoVLX-NEXT: andl %edi, %eax 15869; NoVLX-NEXT: shrl $16, %edi 15870; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15871; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 15872; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15873; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15874; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15875; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15876; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15877; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15878; NoVLX-NEXT: kmovw %k0, %ecx 15879; NoVLX-NEXT: andl %edi, %ecx 15880; NoVLX-NEXT: shll $16, %ecx 15881; NoVLX-NEXT: movzwl %ax, %eax 15882; NoVLX-NEXT: orl %ecx, %eax 15883; NoVLX-NEXT: vzeroupper 15884; NoVLX-NEXT: retq 15885entry: 15886 %0 = bitcast <8 x i64> %__a to <32 x i16> 15887 %1 = bitcast <8 x i64> %__b to <32 x i16> 15888 %2 = icmp ult <32 x i16> %0, %1 15889 %3 = bitcast i32 %__u to <32 x i1> 15890 %4 = and <32 x i1> %2, %3 15891 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15892 %6 = bitcast <64 x i1> %5 to i64 15893 ret i64 %6 15894} 15895 15896define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 15897; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: 15898; VLX: # %bb.0: # %entry 15899; VLX-NEXT: kmovd %edi, %k1 15900; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1} 15901; VLX-NEXT: kmovq %k0, %rax 15902; VLX-NEXT: vzeroupper 15903; VLX-NEXT: retq 15904; 15905; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: 15906; NoVLX: # %bb.0: # %entry 15907; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15908; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 15909; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 15910; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 15911; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 15912; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15913; NoVLX-NEXT: kmovw %k0, %eax 15914; NoVLX-NEXT: andl %edi, %eax 15915; NoVLX-NEXT: shrl $16, %edi 15916; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15917; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1 15918; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15919; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15920; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15921; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 15922; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15923; NoVLX-NEXT: kmovw %k0, %ecx 15924; NoVLX-NEXT: andl %edi, %ecx 15925; NoVLX-NEXT: shll $16, %ecx 15926; NoVLX-NEXT: movzwl %ax, %eax 15927; NoVLX-NEXT: orl %ecx, %eax 15928; NoVLX-NEXT: vzeroupper 15929; NoVLX-NEXT: retq 15930entry: 15931 %0 = bitcast <8 x i64> %__a to <32 x i16> 15932 %load = load <8 x i64>, <8 x i64>* %__b 15933 %1 = bitcast <8 x i64> %load to <32 x i16> 15934 %2 = icmp ult <32 x i16> %0, %1 15935 %3 = bitcast i32 %__u to <32 x i1> 15936 %4 = and <32 x i1> %2, %3 15937 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15938 %6 = bitcast <64 x i1> %5 to i64 15939 ret i64 %6 15940} 15941 15942 15943define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15944; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: 15945; VLX: # %bb.0: # %entry 15946; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 15947; VLX-NEXT: kmovd %k0, %eax 15948; VLX-NEXT: # kill: def $al killed $al killed $eax 15949; VLX-NEXT: retq 15950; 15951; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: 15952; NoVLX: # %bb.0: # %entry 15953; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 15954; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15955; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15956; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15957; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15958; NoVLX-NEXT: kmovw %k0, %eax 15959; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15960; NoVLX-NEXT: vzeroupper 15961; NoVLX-NEXT: retq 15962entry: 15963 %0 = bitcast <2 x i64> %__a to <4 x i32> 15964 %1 = bitcast <2 x i64> %__b to <4 x i32> 15965 %2 = icmp ult <4 x i32> %0, %1 15966 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15967 %4 = bitcast <8 x i1> %3 to i8 15968 ret i8 %4 15969} 15970 15971define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 15972; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: 15973; VLX: # %bb.0: # %entry 15974; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 15975; VLX-NEXT: kmovd %k0, %eax 15976; VLX-NEXT: # kill: def $al killed $al killed $eax 15977; VLX-NEXT: retq 15978; 15979; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: 15980; NoVLX: # %bb.0: # %entry 15981; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15982; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 15983; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15984; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15985; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15986; NoVLX-NEXT: kmovw %k0, %eax 15987; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15988; NoVLX-NEXT: vzeroupper 15989; NoVLX-NEXT: retq 15990entry: 15991 %0 = bitcast <2 x i64> %__a to <4 x i32> 15992 %load = load <2 x i64>, <2 x i64>* %__b 15993 %1 = bitcast <2 x i64> %load to <4 x i32> 15994 %2 = icmp ult <4 x i32> %0, %1 15995 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15996 %4 = bitcast <8 x i1> %3 to i8 15997 ret i8 %4 15998} 15999 16000define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16001; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: 16002; VLX: # %bb.0: # %entry 16003; VLX-NEXT: kmovd %edi, %k1 16004; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16005; VLX-NEXT: kmovd %k0, %eax 16006; VLX-NEXT: # kill: def $al killed $al killed $eax 16007; VLX-NEXT: retq 16008; 16009; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: 16010; NoVLX: # %bb.0: # %entry 16011; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16012; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16013; NoVLX-NEXT: kmovw %edi, %k1 16014; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16015; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16016; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16017; NoVLX-NEXT: kmovw %k0, %eax 16018; NoVLX-NEXT: # kill: def $al killed $al killed $eax 16019; NoVLX-NEXT: vzeroupper 16020; NoVLX-NEXT: retq 16021entry: 16022 %0 = bitcast <2 x i64> %__a to <4 x i32> 16023 %1 = bitcast <2 x i64> %__b to <4 x i32> 16024 %2 = icmp ult <4 x i32> %0, %1 16025 %3 = bitcast i8 %__u to <8 x i1> 16026 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16027 %4 = and <4 x i1> %2, %extract.i 16028 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 16029 %6 = bitcast <8 x i1> %5 to i8 16030 ret i8 %6 16031} 16032 16033define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16034; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: 16035; VLX: # %bb.0: # %entry 16036; VLX-NEXT: kmovd %edi, %k1 16037; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16038; VLX-NEXT: kmovd %k0, %eax 16039; VLX-NEXT: # kill: def $al killed $al killed $eax 16040; VLX-NEXT: retq 16041; 16042; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: 16043; NoVLX: # %bb.0: # %entry 16044; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16045; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16046; NoVLX-NEXT: kmovw %edi, %k1 16047; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16048; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16049; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16050; NoVLX-NEXT: kmovw %k0, %eax 16051; NoVLX-NEXT: # kill: def $al killed $al killed $eax 16052; NoVLX-NEXT: vzeroupper 16053; NoVLX-NEXT: retq 16054entry: 16055 %0 = bitcast <2 x i64> %__a to <4 x i32> 16056 %load = load <2 x i64>, <2 x i64>* %__b 16057 %1 = bitcast <2 x i64> %load to <4 x i32> 16058 %2 = icmp ult <4 x i32> %0, %1 16059 %3 = bitcast i8 %__u to <8 x i1> 16060 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16061 %4 = and <4 x i1> %2, %extract.i 16062 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 16063 %6 = bitcast <8 x i1> %5 to i8 16064 ret i8 %6 16065} 16066 16067 16068define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 16069; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: 16070; VLX: # %bb.0: # %entry 16071; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16072; VLX-NEXT: kmovd %k0, %eax 16073; VLX-NEXT: # kill: def $al killed $al killed $eax 16074; VLX-NEXT: retq 16075; 16076; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: 16077; NoVLX: # %bb.0: # %entry 16078; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16079; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 16080; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16081; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16082; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16083; NoVLX-NEXT: kmovw %k0, %eax 16084; NoVLX-NEXT: # kill: def $al killed $al killed $eax 16085; NoVLX-NEXT: vzeroupper 16086; NoVLX-NEXT: retq 16087entry: 16088 %0 = bitcast <2 x i64> %__a to <4 x i32> 16089 %load = load i32, i32* %__b 16090 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16091 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16092 %2 = icmp ult <4 x i32> %0, %1 16093 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 16094 %4 = bitcast <8 x i1> %3 to i8 16095 ret i8 %4 16096} 16097 16098define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 16099; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: 16100; VLX: # %bb.0: # %entry 16101; VLX-NEXT: kmovd %edi, %k1 16102; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16103; VLX-NEXT: kmovd %k0, %eax 16104; VLX-NEXT: # kill: def $al killed $al killed $eax 16105; VLX-NEXT: retq 16106; 16107; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: 16108; NoVLX: # %bb.0: # %entry 16109; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16110; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 16111; NoVLX-NEXT: kmovw %edi, %k1 16112; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16113; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16114; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16115; NoVLX-NEXT: kmovw %k0, %eax 16116; NoVLX-NEXT: # kill: def $al killed $al killed $eax 16117; NoVLX-NEXT: vzeroupper 16118; NoVLX-NEXT: retq 16119entry: 16120 %0 = bitcast <2 x i64> %__a to <4 x i32> 16121 %load = load i32, i32* %__b 16122 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16123 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16124 %2 = icmp ult <4 x i32> %0, %1 16125 %3 = bitcast i8 %__u to <8 x i1> 16126 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16127 %4 = and <4 x i1> %extract.i, %2 16128 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 16129 %6 = bitcast <8 x i1> %5 to i8 16130 ret i8 %6 16131} 16132 16133 16134define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16135; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: 16136; VLX: # %bb.0: # %entry 16137; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 16138; VLX-NEXT: kmovd %k0, %eax 16139; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16140; VLX-NEXT: retq 16141; 16142; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: 16143; NoVLX: # %bb.0: # %entry 16144; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16145; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16146; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16147; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16148; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16149; NoVLX-NEXT: kmovw %k0, %eax 16150; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16151; NoVLX-NEXT: vzeroupper 16152; NoVLX-NEXT: retq 16153entry: 16154 %0 = bitcast <2 x i64> %__a to <4 x i32> 16155 %1 = bitcast <2 x i64> %__b to <4 x i32> 16156 %2 = icmp ult <4 x i32> %0, %1 16157 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16158 %4 = bitcast <16 x i1> %3 to i16 16159 ret i16 %4 16160} 16161 16162define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16163; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: 16164; VLX: # %bb.0: # %entry 16165; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 16166; VLX-NEXT: kmovd %k0, %eax 16167; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16168; VLX-NEXT: retq 16169; 16170; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: 16171; NoVLX: # %bb.0: # %entry 16172; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16173; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 16174; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16175; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16176; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16177; NoVLX-NEXT: kmovw %k0, %eax 16178; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16179; NoVLX-NEXT: vzeroupper 16180; NoVLX-NEXT: retq 16181entry: 16182 %0 = bitcast <2 x i64> %__a to <4 x i32> 16183 %load = load <2 x i64>, <2 x i64>* %__b 16184 %1 = bitcast <2 x i64> %load to <4 x i32> 16185 %2 = icmp ult <4 x i32> %0, %1 16186 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16187 %4 = bitcast <16 x i1> %3 to i16 16188 ret i16 %4 16189} 16190 16191define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16192; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: 16193; VLX: # %bb.0: # %entry 16194; VLX-NEXT: kmovd %edi, %k1 16195; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16196; VLX-NEXT: kmovd %k0, %eax 16197; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16198; VLX-NEXT: retq 16199; 16200; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: 16201; NoVLX: # %bb.0: # %entry 16202; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16203; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16204; NoVLX-NEXT: kmovw %edi, %k1 16205; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16206; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16207; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16208; NoVLX-NEXT: kmovw %k0, %eax 16209; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16210; NoVLX-NEXT: vzeroupper 16211; NoVLX-NEXT: retq 16212entry: 16213 %0 = bitcast <2 x i64> %__a to <4 x i32> 16214 %1 = bitcast <2 x i64> %__b to <4 x i32> 16215 %2 = icmp ult <4 x i32> %0, %1 16216 %3 = bitcast i8 %__u to <8 x i1> 16217 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16218 %4 = and <4 x i1> %2, %extract.i 16219 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16220 %6 = bitcast <16 x i1> %5 to i16 16221 ret i16 %6 16222} 16223 16224define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16225; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: 16226; VLX: # %bb.0: # %entry 16227; VLX-NEXT: kmovd %edi, %k1 16228; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16229; VLX-NEXT: kmovd %k0, %eax 16230; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16231; VLX-NEXT: retq 16232; 16233; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: 16234; NoVLX: # %bb.0: # %entry 16235; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16236; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16237; NoVLX-NEXT: kmovw %edi, %k1 16238; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16239; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16240; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16241; NoVLX-NEXT: kmovw %k0, %eax 16242; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16243; NoVLX-NEXT: vzeroupper 16244; NoVLX-NEXT: retq 16245entry: 16246 %0 = bitcast <2 x i64> %__a to <4 x i32> 16247 %load = load <2 x i64>, <2 x i64>* %__b 16248 %1 = bitcast <2 x i64> %load to <4 x i32> 16249 %2 = icmp ult <4 x i32> %0, %1 16250 %3 = bitcast i8 %__u to <8 x i1> 16251 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16252 %4 = and <4 x i1> %2, %extract.i 16253 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16254 %6 = bitcast <16 x i1> %5 to i16 16255 ret i16 %6 16256} 16257 16258 16259define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 16260; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: 16261; VLX: # %bb.0: # %entry 16262; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16263; VLX-NEXT: kmovd %k0, %eax 16264; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16265; VLX-NEXT: retq 16266; 16267; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: 16268; NoVLX: # %bb.0: # %entry 16269; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16270; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 16271; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16272; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16273; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16274; NoVLX-NEXT: kmovw %k0, %eax 16275; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16276; NoVLX-NEXT: vzeroupper 16277; NoVLX-NEXT: retq 16278entry: 16279 %0 = bitcast <2 x i64> %__a to <4 x i32> 16280 %load = load i32, i32* %__b 16281 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16282 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16283 %2 = icmp ult <4 x i32> %0, %1 16284 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16285 %4 = bitcast <16 x i1> %3 to i16 16286 ret i16 %4 16287} 16288 16289define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 16290; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: 16291; VLX: # %bb.0: # %entry 16292; VLX-NEXT: kmovd %edi, %k1 16293; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16294; VLX-NEXT: kmovd %k0, %eax 16295; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16296; VLX-NEXT: retq 16297; 16298; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: 16299; NoVLX: # %bb.0: # %entry 16300; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16301; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 16302; NoVLX-NEXT: kmovw %edi, %k1 16303; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16304; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16305; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16306; NoVLX-NEXT: kmovw %k0, %eax 16307; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16308; NoVLX-NEXT: vzeroupper 16309; NoVLX-NEXT: retq 16310entry: 16311 %0 = bitcast <2 x i64> %__a to <4 x i32> 16312 %load = load i32, i32* %__b 16313 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16314 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16315 %2 = icmp ult <4 x i32> %0, %1 16316 %3 = bitcast i8 %__u to <8 x i1> 16317 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16318 %4 = and <4 x i1> %extract.i, %2 16319 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16320 %6 = bitcast <16 x i1> %5 to i16 16321 ret i16 %6 16322} 16323 16324 16325define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16326; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: 16327; VLX: # %bb.0: # %entry 16328; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 16329; VLX-NEXT: kmovd %k0, %eax 16330; VLX-NEXT: retq 16331; 16332; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: 16333; NoVLX: # %bb.0: # %entry 16334; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16335; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16336; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16337; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16338; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16339; NoVLX-NEXT: kmovw %k0, %eax 16340; NoVLX-NEXT: vzeroupper 16341; NoVLX-NEXT: retq 16342entry: 16343 %0 = bitcast <2 x i64> %__a to <4 x i32> 16344 %1 = bitcast <2 x i64> %__b to <4 x i32> 16345 %2 = icmp ult <4 x i32> %0, %1 16346 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16347 %4 = bitcast <32 x i1> %3 to i32 16348 ret i32 %4 16349} 16350 16351define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16352; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: 16353; VLX: # %bb.0: # %entry 16354; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 16355; VLX-NEXT: kmovd %k0, %eax 16356; VLX-NEXT: retq 16357; 16358; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: 16359; NoVLX: # %bb.0: # %entry 16360; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16361; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 16362; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16363; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16364; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16365; NoVLX-NEXT: kmovw %k0, %eax 16366; NoVLX-NEXT: vzeroupper 16367; NoVLX-NEXT: retq 16368entry: 16369 %0 = bitcast <2 x i64> %__a to <4 x i32> 16370 %load = load <2 x i64>, <2 x i64>* %__b 16371 %1 = bitcast <2 x i64> %load to <4 x i32> 16372 %2 = icmp ult <4 x i32> %0, %1 16373 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16374 %4 = bitcast <32 x i1> %3 to i32 16375 ret i32 %4 16376} 16377 16378define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16379; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: 16380; VLX: # %bb.0: # %entry 16381; VLX-NEXT: kmovd %edi, %k1 16382; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16383; VLX-NEXT: kmovd %k0, %eax 16384; VLX-NEXT: retq 16385; 16386; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: 16387; NoVLX: # %bb.0: # %entry 16388; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16389; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16390; NoVLX-NEXT: kmovw %edi, %k1 16391; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16392; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16393; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16394; NoVLX-NEXT: kmovw %k0, %eax 16395; NoVLX-NEXT: vzeroupper 16396; NoVLX-NEXT: retq 16397entry: 16398 %0 = bitcast <2 x i64> %__a to <4 x i32> 16399 %1 = bitcast <2 x i64> %__b to <4 x i32> 16400 %2 = icmp ult <4 x i32> %0, %1 16401 %3 = bitcast i8 %__u to <8 x i1> 16402 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16403 %4 = and <4 x i1> %2, %extract.i 16404 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16405 %6 = bitcast <32 x i1> %5 to i32 16406 ret i32 %6 16407} 16408 16409define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16410; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: 16411; VLX: # %bb.0: # %entry 16412; VLX-NEXT: kmovd %edi, %k1 16413; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16414; VLX-NEXT: kmovd %k0, %eax 16415; VLX-NEXT: retq 16416; 16417; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: 16418; NoVLX: # %bb.0: # %entry 16419; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16420; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16421; NoVLX-NEXT: kmovw %edi, %k1 16422; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16423; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16424; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16425; NoVLX-NEXT: kmovw %k0, %eax 16426; NoVLX-NEXT: vzeroupper 16427; NoVLX-NEXT: retq 16428entry: 16429 %0 = bitcast <2 x i64> %__a to <4 x i32> 16430 %load = load <2 x i64>, <2 x i64>* %__b 16431 %1 = bitcast <2 x i64> %load to <4 x i32> 16432 %2 = icmp ult <4 x i32> %0, %1 16433 %3 = bitcast i8 %__u to <8 x i1> 16434 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16435 %4 = and <4 x i1> %2, %extract.i 16436 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16437 %6 = bitcast <32 x i1> %5 to i32 16438 ret i32 %6 16439} 16440 16441 16442define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 16443; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: 16444; VLX: # %bb.0: # %entry 16445; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16446; VLX-NEXT: kmovd %k0, %eax 16447; VLX-NEXT: retq 16448; 16449; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: 16450; NoVLX: # %bb.0: # %entry 16451; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16452; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 16453; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16454; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16455; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16456; NoVLX-NEXT: kmovw %k0, %eax 16457; NoVLX-NEXT: vzeroupper 16458; NoVLX-NEXT: retq 16459entry: 16460 %0 = bitcast <2 x i64> %__a to <4 x i32> 16461 %load = load i32, i32* %__b 16462 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16463 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16464 %2 = icmp ult <4 x i32> %0, %1 16465 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16466 %4 = bitcast <32 x i1> %3 to i32 16467 ret i32 %4 16468} 16469 16470define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 16471; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: 16472; VLX: # %bb.0: # %entry 16473; VLX-NEXT: kmovd %edi, %k1 16474; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16475; VLX-NEXT: kmovd %k0, %eax 16476; VLX-NEXT: retq 16477; 16478; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: 16479; NoVLX: # %bb.0: # %entry 16480; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16481; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 16482; NoVLX-NEXT: kmovw %edi, %k1 16483; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16484; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16485; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16486; NoVLX-NEXT: kmovw %k0, %eax 16487; NoVLX-NEXT: vzeroupper 16488; NoVLX-NEXT: retq 16489entry: 16490 %0 = bitcast <2 x i64> %__a to <4 x i32> 16491 %load = load i32, i32* %__b 16492 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16493 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16494 %2 = icmp ult <4 x i32> %0, %1 16495 %3 = bitcast i8 %__u to <8 x i1> 16496 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16497 %4 = and <4 x i1> %extract.i, %2 16498 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16499 %6 = bitcast <32 x i1> %5 to i32 16500 ret i32 %6 16501} 16502 16503 16504define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16505; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: 16506; VLX: # %bb.0: # %entry 16507; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 16508; VLX-NEXT: kmovq %k0, %rax 16509; VLX-NEXT: retq 16510; 16511; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: 16512; NoVLX: # %bb.0: # %entry 16513; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16514; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16515; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16516; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16517; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16518; NoVLX-NEXT: kmovw %k0, %eax 16519; NoVLX-NEXT: movzwl %ax, %eax 16520; NoVLX-NEXT: vzeroupper 16521; NoVLX-NEXT: retq 16522entry: 16523 %0 = bitcast <2 x i64> %__a to <4 x i32> 16524 %1 = bitcast <2 x i64> %__b to <4 x i32> 16525 %2 = icmp ult <4 x i32> %0, %1 16526 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16527 %4 = bitcast <64 x i1> %3 to i64 16528 ret i64 %4 16529} 16530 16531define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16532; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: 16533; VLX: # %bb.0: # %entry 16534; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 16535; VLX-NEXT: kmovq %k0, %rax 16536; VLX-NEXT: retq 16537; 16538; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: 16539; NoVLX: # %bb.0: # %entry 16540; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16541; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 16542; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16543; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16544; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16545; NoVLX-NEXT: kmovw %k0, %eax 16546; NoVLX-NEXT: movzwl %ax, %eax 16547; NoVLX-NEXT: vzeroupper 16548; NoVLX-NEXT: retq 16549entry: 16550 %0 = bitcast <2 x i64> %__a to <4 x i32> 16551 %load = load <2 x i64>, <2 x i64>* %__b 16552 %1 = bitcast <2 x i64> %load to <4 x i32> 16553 %2 = icmp ult <4 x i32> %0, %1 16554 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16555 %4 = bitcast <64 x i1> %3 to i64 16556 ret i64 %4 16557} 16558 16559define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16560; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: 16561; VLX: # %bb.0: # %entry 16562; VLX-NEXT: kmovd %edi, %k1 16563; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16564; VLX-NEXT: kmovq %k0, %rax 16565; VLX-NEXT: retq 16566; 16567; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: 16568; NoVLX: # %bb.0: # %entry 16569; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16570; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16571; NoVLX-NEXT: kmovw %edi, %k1 16572; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16573; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16574; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16575; NoVLX-NEXT: kmovw %k0, %eax 16576; NoVLX-NEXT: movzwl %ax, %eax 16577; NoVLX-NEXT: vzeroupper 16578; NoVLX-NEXT: retq 16579entry: 16580 %0 = bitcast <2 x i64> %__a to <4 x i32> 16581 %1 = bitcast <2 x i64> %__b to <4 x i32> 16582 %2 = icmp ult <4 x i32> %0, %1 16583 %3 = bitcast i8 %__u to <8 x i1> 16584 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16585 %4 = and <4 x i1> %2, %extract.i 16586 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16587 %6 = bitcast <64 x i1> %5 to i64 16588 ret i64 %6 16589} 16590 16591define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 16592; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: 16593; VLX: # %bb.0: # %entry 16594; VLX-NEXT: kmovd %edi, %k1 16595; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16596; VLX-NEXT: kmovq %k0, %rax 16597; VLX-NEXT: retq 16598; 16599; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: 16600; NoVLX: # %bb.0: # %entry 16601; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16602; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16603; NoVLX-NEXT: kmovw %edi, %k1 16604; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16605; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16606; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16607; NoVLX-NEXT: kmovw %k0, %eax 16608; NoVLX-NEXT: movzwl %ax, %eax 16609; NoVLX-NEXT: vzeroupper 16610; NoVLX-NEXT: retq 16611entry: 16612 %0 = bitcast <2 x i64> %__a to <4 x i32> 16613 %load = load <2 x i64>, <2 x i64>* %__b 16614 %1 = bitcast <2 x i64> %load to <4 x i32> 16615 %2 = icmp ult <4 x i32> %0, %1 16616 %3 = bitcast i8 %__u to <8 x i1> 16617 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16618 %4 = and <4 x i1> %2, %extract.i 16619 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16620 %6 = bitcast <64 x i1> %5 to i64 16621 ret i64 %6 16622} 16623 16624 16625define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { 16626; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: 16627; VLX: # %bb.0: # %entry 16628; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16629; VLX-NEXT: kmovq %k0, %rax 16630; VLX-NEXT: retq 16631; 16632; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: 16633; NoVLX: # %bb.0: # %entry 16634; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16635; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 16636; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16637; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16638; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16639; NoVLX-NEXT: kmovw %k0, %eax 16640; NoVLX-NEXT: movzwl %ax, %eax 16641; NoVLX-NEXT: vzeroupper 16642; NoVLX-NEXT: retq 16643entry: 16644 %0 = bitcast <2 x i64> %__a to <4 x i32> 16645 %load = load i32, i32* %__b 16646 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16647 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16648 %2 = icmp ult <4 x i32> %0, %1 16649 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16650 %4 = bitcast <64 x i1> %3 to i64 16651 ret i64 %4 16652} 16653 16654define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { 16655; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: 16656; VLX: # %bb.0: # %entry 16657; VLX-NEXT: kmovd %edi, %k1 16658; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16659; VLX-NEXT: kmovq %k0, %rax 16660; VLX-NEXT: retq 16661; 16662; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: 16663; NoVLX: # %bb.0: # %entry 16664; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16665; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 16666; NoVLX-NEXT: kmovw %edi, %k1 16667; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16668; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16669; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16670; NoVLX-NEXT: kmovw %k0, %eax 16671; NoVLX-NEXT: movzwl %ax, %eax 16672; NoVLX-NEXT: vzeroupper 16673; NoVLX-NEXT: retq 16674entry: 16675 %0 = bitcast <2 x i64> %__a to <4 x i32> 16676 %load = load i32, i32* %__b 16677 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16678 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16679 %2 = icmp ult <4 x i32> %0, %1 16680 %3 = bitcast i8 %__u to <8 x i1> 16681 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16682 %4 = and <4 x i1> %extract.i, %2 16683 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16684 %6 = bitcast <64 x i1> %5 to i64 16685 ret i64 %6 16686} 16687 16688 16689define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16690; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: 16691; VLX: # %bb.0: # %entry 16692; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 16693; VLX-NEXT: kmovd %k0, %eax 16694; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16695; VLX-NEXT: vzeroupper 16696; VLX-NEXT: retq 16697; 16698; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: 16699; NoVLX: # %bb.0: # %entry 16700; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16701; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16702; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16703; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16704; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16705; NoVLX-NEXT: kmovw %k0, %eax 16706; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16707; NoVLX-NEXT: vzeroupper 16708; NoVLX-NEXT: retq 16709entry: 16710 %0 = bitcast <4 x i64> %__a to <8 x i32> 16711 %1 = bitcast <4 x i64> %__b to <8 x i32> 16712 %2 = icmp ult <8 x i32> %0, %1 16713 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16714 %4 = bitcast <16 x i1> %3 to i16 16715 ret i16 %4 16716} 16717 16718define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 16719; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: 16720; VLX: # %bb.0: # %entry 16721; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 16722; VLX-NEXT: kmovd %k0, %eax 16723; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16724; VLX-NEXT: vzeroupper 16725; VLX-NEXT: retq 16726; 16727; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: 16728; NoVLX: # %bb.0: # %entry 16729; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16730; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 16731; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16732; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16733; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16734; NoVLX-NEXT: kmovw %k0, %eax 16735; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16736; NoVLX-NEXT: vzeroupper 16737; NoVLX-NEXT: retq 16738entry: 16739 %0 = bitcast <4 x i64> %__a to <8 x i32> 16740 %load = load <4 x i64>, <4 x i64>* %__b 16741 %1 = bitcast <4 x i64> %load to <8 x i32> 16742 %2 = icmp ult <8 x i32> %0, %1 16743 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16744 %4 = bitcast <16 x i1> %3 to i16 16745 ret i16 %4 16746} 16747 16748define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16749; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: 16750; VLX: # %bb.0: # %entry 16751; VLX-NEXT: kmovd %edi, %k1 16752; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 16753; VLX-NEXT: kmovd %k0, %eax 16754; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16755; VLX-NEXT: vzeroupper 16756; VLX-NEXT: retq 16757; 16758; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: 16759; NoVLX: # %bb.0: # %entry 16760; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16761; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16762; NoVLX-NEXT: kmovw %edi, %k1 16763; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16764; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16765; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16766; NoVLX-NEXT: kmovw %k0, %eax 16767; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16768; NoVLX-NEXT: vzeroupper 16769; NoVLX-NEXT: retq 16770entry: 16771 %0 = bitcast <4 x i64> %__a to <8 x i32> 16772 %1 = bitcast <4 x i64> %__b to <8 x i32> 16773 %2 = icmp ult <8 x i32> %0, %1 16774 %3 = bitcast i8 %__u to <8 x i1> 16775 %4 = and <8 x i1> %2, %3 16776 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16777 %6 = bitcast <16 x i1> %5 to i16 16778 ret i16 %6 16779} 16780 16781define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 16782; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: 16783; VLX: # %bb.0: # %entry 16784; VLX-NEXT: kmovd %edi, %k1 16785; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 16786; VLX-NEXT: kmovd %k0, %eax 16787; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16788; VLX-NEXT: vzeroupper 16789; VLX-NEXT: retq 16790; 16791; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: 16792; NoVLX: # %bb.0: # %entry 16793; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16794; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 16795; NoVLX-NEXT: kmovw %edi, %k1 16796; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16797; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16798; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16799; NoVLX-NEXT: kmovw %k0, %eax 16800; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16801; NoVLX-NEXT: vzeroupper 16802; NoVLX-NEXT: retq 16803entry: 16804 %0 = bitcast <4 x i64> %__a to <8 x i32> 16805 %load = load <4 x i64>, <4 x i64>* %__b 16806 %1 = bitcast <4 x i64> %load to <8 x i32> 16807 %2 = icmp ult <8 x i32> %0, %1 16808 %3 = bitcast i8 %__u to <8 x i1> 16809 %4 = and <8 x i1> %2, %3 16810 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16811 %6 = bitcast <16 x i1> %5 to i16 16812 ret i16 %6 16813} 16814 16815 16816define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 16817; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: 16818; VLX: # %bb.0: # %entry 16819; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 16820; VLX-NEXT: kmovd %k0, %eax 16821; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16822; VLX-NEXT: vzeroupper 16823; VLX-NEXT: retq 16824; 16825; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: 16826; NoVLX: # %bb.0: # %entry 16827; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16828; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 16829; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16830; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16831; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16832; NoVLX-NEXT: kmovw %k0, %eax 16833; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16834; NoVLX-NEXT: vzeroupper 16835; NoVLX-NEXT: retq 16836entry: 16837 %0 = bitcast <4 x i64> %__a to <8 x i32> 16838 %load = load i32, i32* %__b 16839 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16840 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16841 %2 = icmp ult <8 x i32> %0, %1 16842 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16843 %4 = bitcast <16 x i1> %3 to i16 16844 ret i16 %4 16845} 16846 16847define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 16848; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: 16849; VLX: # %bb.0: # %entry 16850; VLX-NEXT: kmovd %edi, %k1 16851; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 16852; VLX-NEXT: kmovd %k0, %eax 16853; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16854; VLX-NEXT: vzeroupper 16855; VLX-NEXT: retq 16856; 16857; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: 16858; NoVLX: # %bb.0: # %entry 16859; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16860; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 16861; NoVLX-NEXT: kmovw %edi, %k1 16862; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16863; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16864; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16865; NoVLX-NEXT: kmovw %k0, %eax 16866; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16867; NoVLX-NEXT: vzeroupper 16868; NoVLX-NEXT: retq 16869entry: 16870 %0 = bitcast <4 x i64> %__a to <8 x i32> 16871 %load = load i32, i32* %__b 16872 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16873 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16874 %2 = icmp ult <8 x i32> %0, %1 16875 %3 = bitcast i8 %__u to <8 x i1> 16876 %4 = and <8 x i1> %3, %2 16877 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16878 %6 = bitcast <16 x i1> %5 to i16 16879 ret i16 %6 16880} 16881 16882 16883define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16884; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: 16885; VLX: # %bb.0: # %entry 16886; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 16887; VLX-NEXT: kmovd %k0, %eax 16888; VLX-NEXT: vzeroupper 16889; VLX-NEXT: retq 16890; 16891; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: 16892; NoVLX: # %bb.0: # %entry 16893; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16894; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16895; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16896; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16897; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16898; NoVLX-NEXT: kmovw %k0, %eax 16899; NoVLX-NEXT: vzeroupper 16900; NoVLX-NEXT: retq 16901entry: 16902 %0 = bitcast <4 x i64> %__a to <8 x i32> 16903 %1 = bitcast <4 x i64> %__b to <8 x i32> 16904 %2 = icmp ult <8 x i32> %0, %1 16905 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16906 %4 = bitcast <32 x i1> %3 to i32 16907 ret i32 %4 16908} 16909 16910define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 16911; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: 16912; VLX: # %bb.0: # %entry 16913; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 16914; VLX-NEXT: kmovd %k0, %eax 16915; VLX-NEXT: vzeroupper 16916; VLX-NEXT: retq 16917; 16918; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: 16919; NoVLX: # %bb.0: # %entry 16920; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16921; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 16922; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16923; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16924; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16925; NoVLX-NEXT: kmovw %k0, %eax 16926; NoVLX-NEXT: vzeroupper 16927; NoVLX-NEXT: retq 16928entry: 16929 %0 = bitcast <4 x i64> %__a to <8 x i32> 16930 %load = load <4 x i64>, <4 x i64>* %__b 16931 %1 = bitcast <4 x i64> %load to <8 x i32> 16932 %2 = icmp ult <8 x i32> %0, %1 16933 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16934 %4 = bitcast <32 x i1> %3 to i32 16935 ret i32 %4 16936} 16937 16938define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16939; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: 16940; VLX: # %bb.0: # %entry 16941; VLX-NEXT: kmovd %edi, %k1 16942; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 16943; VLX-NEXT: kmovd %k0, %eax 16944; VLX-NEXT: vzeroupper 16945; VLX-NEXT: retq 16946; 16947; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: 16948; NoVLX: # %bb.0: # %entry 16949; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16950; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16951; NoVLX-NEXT: kmovw %edi, %k1 16952; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16953; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16954; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16955; NoVLX-NEXT: kmovw %k0, %eax 16956; NoVLX-NEXT: vzeroupper 16957; NoVLX-NEXT: retq 16958entry: 16959 %0 = bitcast <4 x i64> %__a to <8 x i32> 16960 %1 = bitcast <4 x i64> %__b to <8 x i32> 16961 %2 = icmp ult <8 x i32> %0, %1 16962 %3 = bitcast i8 %__u to <8 x i1> 16963 %4 = and <8 x i1> %2, %3 16964 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16965 %6 = bitcast <32 x i1> %5 to i32 16966 ret i32 %6 16967} 16968 16969define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 16970; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: 16971; VLX: # %bb.0: # %entry 16972; VLX-NEXT: kmovd %edi, %k1 16973; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 16974; VLX-NEXT: kmovd %k0, %eax 16975; VLX-NEXT: vzeroupper 16976; VLX-NEXT: retq 16977; 16978; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: 16979; NoVLX: # %bb.0: # %entry 16980; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16981; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 16982; NoVLX-NEXT: kmovw %edi, %k1 16983; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16984; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16985; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16986; NoVLX-NEXT: kmovw %k0, %eax 16987; NoVLX-NEXT: vzeroupper 16988; NoVLX-NEXT: retq 16989entry: 16990 %0 = bitcast <4 x i64> %__a to <8 x i32> 16991 %load = load <4 x i64>, <4 x i64>* %__b 16992 %1 = bitcast <4 x i64> %load to <8 x i32> 16993 %2 = icmp ult <8 x i32> %0, %1 16994 %3 = bitcast i8 %__u to <8 x i1> 16995 %4 = and <8 x i1> %2, %3 16996 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16997 %6 = bitcast <32 x i1> %5 to i32 16998 ret i32 %6 16999} 17000 17001 17002define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 17003; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: 17004; VLX: # %bb.0: # %entry 17005; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 17006; VLX-NEXT: kmovd %k0, %eax 17007; VLX-NEXT: vzeroupper 17008; VLX-NEXT: retq 17009; 17010; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: 17011; NoVLX: # %bb.0: # %entry 17012; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17013; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 17014; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17015; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17016; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17017; NoVLX-NEXT: kmovw %k0, %eax 17018; NoVLX-NEXT: vzeroupper 17019; NoVLX-NEXT: retq 17020entry: 17021 %0 = bitcast <4 x i64> %__a to <8 x i32> 17022 %load = load i32, i32* %__b 17023 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 17024 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17025 %2 = icmp ult <8 x i32> %0, %1 17026 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17027 %4 = bitcast <32 x i1> %3 to i32 17028 ret i32 %4 17029} 17030 17031define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 17032; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: 17033; VLX: # %bb.0: # %entry 17034; VLX-NEXT: kmovd %edi, %k1 17035; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 17036; VLX-NEXT: kmovd %k0, %eax 17037; VLX-NEXT: vzeroupper 17038; VLX-NEXT: retq 17039; 17040; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: 17041; NoVLX: # %bb.0: # %entry 17042; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17043; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 17044; NoVLX-NEXT: kmovw %edi, %k1 17045; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17046; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17047; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17048; NoVLX-NEXT: kmovw %k0, %eax 17049; NoVLX-NEXT: vzeroupper 17050; NoVLX-NEXT: retq 17051entry: 17052 %0 = bitcast <4 x i64> %__a to <8 x i32> 17053 %load = load i32, i32* %__b 17054 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 17055 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17056 %2 = icmp ult <8 x i32> %0, %1 17057 %3 = bitcast i8 %__u to <8 x i1> 17058 %4 = and <8 x i1> %3, %2 17059 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17060 %6 = bitcast <32 x i1> %5 to i32 17061 ret i32 %6 17062} 17063 17064 17065define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 17066; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: 17067; VLX: # %bb.0: # %entry 17068; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 17069; VLX-NEXT: kmovq %k0, %rax 17070; VLX-NEXT: vzeroupper 17071; VLX-NEXT: retq 17072; 17073; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: 17074; NoVLX: # %bb.0: # %entry 17075; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 17076; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17077; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17078; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17079; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17080; NoVLX-NEXT: kmovw %k0, %eax 17081; NoVLX-NEXT: movzwl %ax, %eax 17082; NoVLX-NEXT: vzeroupper 17083; NoVLX-NEXT: retq 17084entry: 17085 %0 = bitcast <4 x i64> %__a to <8 x i32> 17086 %1 = bitcast <4 x i64> %__b to <8 x i32> 17087 %2 = icmp ult <8 x i32> %0, %1 17088 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17089 %4 = bitcast <64 x i1> %3 to i64 17090 ret i64 %4 17091} 17092 17093define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 17094; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: 17095; VLX: # %bb.0: # %entry 17096; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 17097; VLX-NEXT: kmovq %k0, %rax 17098; VLX-NEXT: vzeroupper 17099; VLX-NEXT: retq 17100; 17101; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: 17102; NoVLX: # %bb.0: # %entry 17103; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17104; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 17105; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17106; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17107; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17108; NoVLX-NEXT: kmovw %k0, %eax 17109; NoVLX-NEXT: movzwl %ax, %eax 17110; NoVLX-NEXT: vzeroupper 17111; NoVLX-NEXT: retq 17112entry: 17113 %0 = bitcast <4 x i64> %__a to <8 x i32> 17114 %load = load <4 x i64>, <4 x i64>* %__b 17115 %1 = bitcast <4 x i64> %load to <8 x i32> 17116 %2 = icmp ult <8 x i32> %0, %1 17117 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17118 %4 = bitcast <64 x i1> %3 to i64 17119 ret i64 %4 17120} 17121 17122define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 17123; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: 17124; VLX: # %bb.0: # %entry 17125; VLX-NEXT: kmovd %edi, %k1 17126; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 17127; VLX-NEXT: kmovq %k0, %rax 17128; VLX-NEXT: vzeroupper 17129; VLX-NEXT: retq 17130; 17131; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: 17132; NoVLX: # %bb.0: # %entry 17133; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 17134; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17135; NoVLX-NEXT: kmovw %edi, %k1 17136; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17137; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17138; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17139; NoVLX-NEXT: kmovw %k0, %eax 17140; NoVLX-NEXT: movzwl %ax, %eax 17141; NoVLX-NEXT: vzeroupper 17142; NoVLX-NEXT: retq 17143entry: 17144 %0 = bitcast <4 x i64> %__a to <8 x i32> 17145 %1 = bitcast <4 x i64> %__b to <8 x i32> 17146 %2 = icmp ult <8 x i32> %0, %1 17147 %3 = bitcast i8 %__u to <8 x i1> 17148 %4 = and <8 x i1> %2, %3 17149 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17150 %6 = bitcast <64 x i1> %5 to i64 17151 ret i64 %6 17152} 17153 17154define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 17155; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: 17156; VLX: # %bb.0: # %entry 17157; VLX-NEXT: kmovd %edi, %k1 17158; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 17159; VLX-NEXT: kmovq %k0, %rax 17160; VLX-NEXT: vzeroupper 17161; VLX-NEXT: retq 17162; 17163; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: 17164; NoVLX: # %bb.0: # %entry 17165; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17166; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 17167; NoVLX-NEXT: kmovw %edi, %k1 17168; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17169; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17170; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17171; NoVLX-NEXT: kmovw %k0, %eax 17172; NoVLX-NEXT: movzwl %ax, %eax 17173; NoVLX-NEXT: vzeroupper 17174; NoVLX-NEXT: retq 17175entry: 17176 %0 = bitcast <4 x i64> %__a to <8 x i32> 17177 %load = load <4 x i64>, <4 x i64>* %__b 17178 %1 = bitcast <4 x i64> %load to <8 x i32> 17179 %2 = icmp ult <8 x i32> %0, %1 17180 %3 = bitcast i8 %__u to <8 x i1> 17181 %4 = and <8 x i1> %2, %3 17182 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17183 %6 = bitcast <64 x i1> %5 to i64 17184 ret i64 %6 17185} 17186 17187 17188define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { 17189; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: 17190; VLX: # %bb.0: # %entry 17191; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 17192; VLX-NEXT: kmovq %k0, %rax 17193; VLX-NEXT: vzeroupper 17194; VLX-NEXT: retq 17195; 17196; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: 17197; NoVLX: # %bb.0: # %entry 17198; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17199; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 17200; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17201; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17202; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17203; NoVLX-NEXT: kmovw %k0, %eax 17204; NoVLX-NEXT: movzwl %ax, %eax 17205; NoVLX-NEXT: vzeroupper 17206; NoVLX-NEXT: retq 17207entry: 17208 %0 = bitcast <4 x i64> %__a to <8 x i32> 17209 %load = load i32, i32* %__b 17210 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 17211 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17212 %2 = icmp ult <8 x i32> %0, %1 17213 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17214 %4 = bitcast <64 x i1> %3 to i64 17215 ret i64 %4 17216} 17217 17218define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { 17219; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: 17220; VLX: # %bb.0: # %entry 17221; VLX-NEXT: kmovd %edi, %k1 17222; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 17223; VLX-NEXT: kmovq %k0, %rax 17224; VLX-NEXT: vzeroupper 17225; VLX-NEXT: retq 17226; 17227; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: 17228; NoVLX: # %bb.0: # %entry 17229; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 17230; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 17231; NoVLX-NEXT: kmovw %edi, %k1 17232; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17233; NoVLX-NEXT: kshiftlw $8, %k0, %k0 17234; NoVLX-NEXT: kshiftrw $8, %k0, %k0 17235; NoVLX-NEXT: kmovw %k0, %eax 17236; NoVLX-NEXT: movzwl %ax, %eax 17237; NoVLX-NEXT: vzeroupper 17238; NoVLX-NEXT: retq 17239entry: 17240 %0 = bitcast <4 x i64> %__a to <8 x i32> 17241 %load = load i32, i32* %__b 17242 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 17243 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17244 %2 = icmp ult <8 x i32> %0, %1 17245 %3 = bitcast i8 %__u to <8 x i1> 17246 %4 = and <8 x i1> %3, %2 17247 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 17248 %6 = bitcast <64 x i1> %5 to i64 17249 ret i64 %6 17250} 17251 17252 17253define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17254; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: 17255; VLX: # %bb.0: # %entry 17256; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17257; VLX-NEXT: kmovd %k0, %eax 17258; VLX-NEXT: vzeroupper 17259; VLX-NEXT: retq 17260; 17261; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: 17262; NoVLX: # %bb.0: # %entry 17263; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17264; NoVLX-NEXT: kmovw %k0, %eax 17265; NoVLX-NEXT: vzeroupper 17266; NoVLX-NEXT: retq 17267entry: 17268 %0 = bitcast <8 x i64> %__a to <16 x i32> 17269 %1 = bitcast <8 x i64> %__b to <16 x i32> 17270 %2 = icmp ult <16 x i32> %0, %1 17271 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17272 %4 = bitcast <32 x i1> %3 to i32 17273 ret i32 %4 17274} 17275 17276define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 17277; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: 17278; VLX: # %bb.0: # %entry 17279; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17280; VLX-NEXT: kmovd %k0, %eax 17281; VLX-NEXT: vzeroupper 17282; VLX-NEXT: retq 17283; 17284; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: 17285; NoVLX: # %bb.0: # %entry 17286; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17287; NoVLX-NEXT: kmovw %k0, %eax 17288; NoVLX-NEXT: vzeroupper 17289; NoVLX-NEXT: retq 17290entry: 17291 %0 = bitcast <8 x i64> %__a to <16 x i32> 17292 %load = load <8 x i64>, <8 x i64>* %__b 17293 %1 = bitcast <8 x i64> %load to <16 x i32> 17294 %2 = icmp ult <16 x i32> %0, %1 17295 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17296 %4 = bitcast <32 x i1> %3 to i32 17297 ret i32 %4 17298} 17299 17300define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17301; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: 17302; VLX: # %bb.0: # %entry 17303; VLX-NEXT: kmovd %edi, %k1 17304; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17305; VLX-NEXT: kmovd %k0, %eax 17306; VLX-NEXT: vzeroupper 17307; VLX-NEXT: retq 17308; 17309; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: 17310; NoVLX: # %bb.0: # %entry 17311; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17312; NoVLX-NEXT: kmovw %k0, %eax 17313; NoVLX-NEXT: andl %edi, %eax 17314; NoVLX-NEXT: vzeroupper 17315; NoVLX-NEXT: retq 17316entry: 17317 %0 = bitcast <8 x i64> %__a to <16 x i32> 17318 %1 = bitcast <8 x i64> %__b to <16 x i32> 17319 %2 = icmp ult <16 x i32> %0, %1 17320 %3 = bitcast i16 %__u to <16 x i1> 17321 %4 = and <16 x i1> %2, %3 17322 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17323 %6 = bitcast <32 x i1> %5 to i32 17324 ret i32 %6 17325} 17326 17327define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 17328; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: 17329; VLX: # %bb.0: # %entry 17330; VLX-NEXT: kmovd %edi, %k1 17331; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} 17332; VLX-NEXT: kmovd %k0, %eax 17333; VLX-NEXT: vzeroupper 17334; VLX-NEXT: retq 17335; 17336; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: 17337; NoVLX: # %bb.0: # %entry 17338; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 17339; NoVLX-NEXT: kmovw %k0, %eax 17340; NoVLX-NEXT: andl %edi, %eax 17341; NoVLX-NEXT: vzeroupper 17342; NoVLX-NEXT: retq 17343entry: 17344 %0 = bitcast <8 x i64> %__a to <16 x i32> 17345 %load = load <8 x i64>, <8 x i64>* %__b 17346 %1 = bitcast <8 x i64> %load to <16 x i32> 17347 %2 = icmp ult <16 x i32> %0, %1 17348 %3 = bitcast i16 %__u to <16 x i1> 17349 %4 = and <16 x i1> %2, %3 17350 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17351 %6 = bitcast <32 x i1> %5 to i32 17352 ret i32 %6 17353} 17354 17355 17356define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 17357; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: 17358; VLX: # %bb.0: # %entry 17359; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17360; VLX-NEXT: kmovd %k0, %eax 17361; VLX-NEXT: vzeroupper 17362; VLX-NEXT: retq 17363; 17364; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: 17365; NoVLX: # %bb.0: # %entry 17366; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17367; NoVLX-NEXT: kmovw %k0, %eax 17368; NoVLX-NEXT: vzeroupper 17369; NoVLX-NEXT: retq 17370entry: 17371 %0 = bitcast <8 x i64> %__a to <16 x i32> 17372 %load = load i32, i32* %__b 17373 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17374 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17375 %2 = icmp ult <16 x i32> %0, %1 17376 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17377 %4 = bitcast <32 x i1> %3 to i32 17378 ret i32 %4 17379} 17380 17381define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 17382; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: 17383; VLX: # %bb.0: # %entry 17384; VLX-NEXT: kmovd %edi, %k1 17385; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 17386; VLX-NEXT: kmovd %k0, %eax 17387; VLX-NEXT: vzeroupper 17388; VLX-NEXT: retq 17389; 17390; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: 17391; NoVLX: # %bb.0: # %entry 17392; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 17393; NoVLX-NEXT: kmovw %k0, %eax 17394; NoVLX-NEXT: andl %edi, %eax 17395; NoVLX-NEXT: vzeroupper 17396; NoVLX-NEXT: retq 17397entry: 17398 %0 = bitcast <8 x i64> %__a to <16 x i32> 17399 %load = load i32, i32* %__b 17400 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17401 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17402 %2 = icmp ult <16 x i32> %0, %1 17403 %3 = bitcast i16 %__u to <16 x i1> 17404 %4 = and <16 x i1> %3, %2 17405 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17406 %6 = bitcast <32 x i1> %5 to i32 17407 ret i32 %6 17408} 17409 17410 17411define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17412; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: 17413; VLX: # %bb.0: # %entry 17414; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17415; VLX-NEXT: kmovq %k0, %rax 17416; VLX-NEXT: vzeroupper 17417; VLX-NEXT: retq 17418; 17419; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: 17420; NoVLX: # %bb.0: # %entry 17421; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17422; NoVLX-NEXT: kmovw %k0, %eax 17423; NoVLX-NEXT: movzwl %ax, %eax 17424; NoVLX-NEXT: vzeroupper 17425; NoVLX-NEXT: retq 17426entry: 17427 %0 = bitcast <8 x i64> %__a to <16 x i32> 17428 %1 = bitcast <8 x i64> %__b to <16 x i32> 17429 %2 = icmp ult <16 x i32> %0, %1 17430 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17431 %4 = bitcast <64 x i1> %3 to i64 17432 ret i64 %4 17433} 17434 17435define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 17436; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: 17437; VLX: # %bb.0: # %entry 17438; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17439; VLX-NEXT: kmovq %k0, %rax 17440; VLX-NEXT: vzeroupper 17441; VLX-NEXT: retq 17442; 17443; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: 17444; NoVLX: # %bb.0: # %entry 17445; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17446; NoVLX-NEXT: kmovw %k0, %eax 17447; NoVLX-NEXT: movzwl %ax, %eax 17448; NoVLX-NEXT: vzeroupper 17449; NoVLX-NEXT: retq 17450entry: 17451 %0 = bitcast <8 x i64> %__a to <16 x i32> 17452 %load = load <8 x i64>, <8 x i64>* %__b 17453 %1 = bitcast <8 x i64> %load to <16 x i32> 17454 %2 = icmp ult <16 x i32> %0, %1 17455 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17456 %4 = bitcast <64 x i1> %3 to i64 17457 ret i64 %4 17458} 17459 17460define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17461; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: 17462; VLX: # %bb.0: # %entry 17463; VLX-NEXT: kmovd %edi, %k1 17464; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17465; VLX-NEXT: kmovq %k0, %rax 17466; VLX-NEXT: vzeroupper 17467; VLX-NEXT: retq 17468; 17469; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: 17470; NoVLX: # %bb.0: # %entry 17471; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17472; NoVLX-NEXT: kmovw %k0, %eax 17473; NoVLX-NEXT: andl %edi, %eax 17474; NoVLX-NEXT: vzeroupper 17475; NoVLX-NEXT: retq 17476entry: 17477 %0 = bitcast <8 x i64> %__a to <16 x i32> 17478 %1 = bitcast <8 x i64> %__b to <16 x i32> 17479 %2 = icmp ult <16 x i32> %0, %1 17480 %3 = bitcast i16 %__u to <16 x i1> 17481 %4 = and <16 x i1> %2, %3 17482 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17483 %6 = bitcast <64 x i1> %5 to i64 17484 ret i64 %6 17485} 17486 17487define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 17488; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: 17489; VLX: # %bb.0: # %entry 17490; VLX-NEXT: kmovd %edi, %k1 17491; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} 17492; VLX-NEXT: kmovq %k0, %rax 17493; VLX-NEXT: vzeroupper 17494; VLX-NEXT: retq 17495; 17496; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: 17497; NoVLX: # %bb.0: # %entry 17498; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 17499; NoVLX-NEXT: kmovw %k0, %eax 17500; NoVLX-NEXT: andl %edi, %eax 17501; NoVLX-NEXT: vzeroupper 17502; NoVLX-NEXT: retq 17503entry: 17504 %0 = bitcast <8 x i64> %__a to <16 x i32> 17505 %load = load <8 x i64>, <8 x i64>* %__b 17506 %1 = bitcast <8 x i64> %load to <16 x i32> 17507 %2 = icmp ult <16 x i32> %0, %1 17508 %3 = bitcast i16 %__u to <16 x i1> 17509 %4 = and <16 x i1> %2, %3 17510 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17511 %6 = bitcast <64 x i1> %5 to i64 17512 ret i64 %6 17513} 17514 17515 17516define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { 17517; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: 17518; VLX: # %bb.0: # %entry 17519; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17520; VLX-NEXT: kmovq %k0, %rax 17521; VLX-NEXT: vzeroupper 17522; VLX-NEXT: retq 17523; 17524; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: 17525; NoVLX: # %bb.0: # %entry 17526; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17527; NoVLX-NEXT: kmovw %k0, %eax 17528; NoVLX-NEXT: movzwl %ax, %eax 17529; NoVLX-NEXT: vzeroupper 17530; NoVLX-NEXT: retq 17531entry: 17532 %0 = bitcast <8 x i64> %__a to <16 x i32> 17533 %load = load i32, i32* %__b 17534 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17535 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17536 %2 = icmp ult <16 x i32> %0, %1 17537 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17538 %4 = bitcast <64 x i1> %3 to i64 17539 ret i64 %4 17540} 17541 17542define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { 17543; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: 17544; VLX: # %bb.0: # %entry 17545; VLX-NEXT: kmovd %edi, %k1 17546; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 17547; VLX-NEXT: kmovq %k0, %rax 17548; VLX-NEXT: vzeroupper 17549; VLX-NEXT: retq 17550; 17551; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: 17552; NoVLX: # %bb.0: # %entry 17553; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 17554; NoVLX-NEXT: kmovw %k0, %eax 17555; NoVLX-NEXT: andl %edi, %eax 17556; NoVLX-NEXT: vzeroupper 17557; NoVLX-NEXT: retq 17558entry: 17559 %0 = bitcast <8 x i64> %__a to <16 x i32> 17560 %load = load i32, i32* %__b 17561 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17562 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17563 %2 = icmp ult <16 x i32> %0, %1 17564 %3 = bitcast i16 %__u to <16 x i1> 17565 %4 = and <16 x i1> %3, %2 17566 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17567 %6 = bitcast <64 x i1> %5 to i64 17568 ret i64 %6 17569} 17570 17571 17572define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17573; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: 17574; VLX: # %bb.0: # %entry 17575; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17576; VLX-NEXT: kmovb %k0, %eax 17577; VLX-NEXT: retq 17578; 17579; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: 17580; NoVLX: # %bb.0: # %entry 17581; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17582; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17583; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17584; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17585; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17586; NoVLX-NEXT: kmovw %k0, %eax 17587; NoVLX-NEXT: andl $3, %eax 17588; NoVLX-NEXT: vzeroupper 17589; NoVLX-NEXT: retq 17590entry: 17591 %0 = bitcast <2 x i64> %__a to <2 x i64> 17592 %1 = bitcast <2 x i64> %__b to <2 x i64> 17593 %2 = icmp ult <2 x i64> %0, %1 17594 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17595 %4 = bitcast <4 x i1> %3 to i4 17596 ret i4 %4 17597} 17598 17599define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 17600; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: 17601; VLX: # %bb.0: # %entry 17602; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17603; VLX-NEXT: kmovb %k0, %eax 17604; VLX-NEXT: retq 17605; 17606; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: 17607; NoVLX: # %bb.0: # %entry 17608; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17609; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17610; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17611; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17612; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17613; NoVLX-NEXT: kmovw %k0, %eax 17614; NoVLX-NEXT: andl $3, %eax 17615; NoVLX-NEXT: vzeroupper 17616; NoVLX-NEXT: retq 17617entry: 17618 %0 = bitcast <2 x i64> %__a to <2 x i64> 17619 %load = load <2 x i64>, <2 x i64>* %__b 17620 %1 = bitcast <2 x i64> %load to <2 x i64> 17621 %2 = icmp ult <2 x i64> %0, %1 17622 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17623 %4 = bitcast <4 x i1> %3 to i4 17624 ret i4 %4 17625} 17626 17627define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17628; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: 17629; VLX: # %bb.0: # %entry 17630; VLX-NEXT: kmovd %edi, %k1 17631; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17632; VLX-NEXT: kmovb %k0, %eax 17633; VLX-NEXT: retq 17634; 17635; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: 17636; NoVLX: # %bb.0: # %entry 17637; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17638; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17639; NoVLX-NEXT: kmovw %edi, %k1 17640; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17641; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17642; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17643; NoVLX-NEXT: kmovw %k0, %eax 17644; NoVLX-NEXT: andl $3, %eax 17645; NoVLX-NEXT: vzeroupper 17646; NoVLX-NEXT: retq 17647entry: 17648 %0 = bitcast <2 x i64> %__a to <2 x i64> 17649 %1 = bitcast <2 x i64> %__b to <2 x i64> 17650 %2 = icmp ult <2 x i64> %0, %1 17651 %3 = bitcast i8 %__u to <8 x i1> 17652 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17653 %4 = and <2 x i1> %2, %extract.i 17654 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17655 %6 = bitcast <4 x i1> %5 to i4 17656 ret i4 %6 17657} 17658 17659define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 17660; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: 17661; VLX: # %bb.0: # %entry 17662; VLX-NEXT: kmovd %edi, %k1 17663; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17664; VLX-NEXT: kmovb %k0, %eax 17665; VLX-NEXT: retq 17666; 17667; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: 17668; NoVLX: # %bb.0: # %entry 17669; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17670; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17671; NoVLX-NEXT: kmovw %edi, %k1 17672; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17673; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17674; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17675; NoVLX-NEXT: kmovw %k0, %eax 17676; NoVLX-NEXT: andl $3, %eax 17677; NoVLX-NEXT: vzeroupper 17678; NoVLX-NEXT: retq 17679entry: 17680 %0 = bitcast <2 x i64> %__a to <2 x i64> 17681 %load = load <2 x i64>, <2 x i64>* %__b 17682 %1 = bitcast <2 x i64> %load to <2 x i64> 17683 %2 = icmp ult <2 x i64> %0, %1 17684 %3 = bitcast i8 %__u to <8 x i1> 17685 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17686 %4 = and <2 x i1> %2, %extract.i 17687 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17688 %6 = bitcast <4 x i1> %5 to i4 17689 ret i4 %6 17690} 17691 17692 17693define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 17694; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: 17695; VLX: # %bb.0: # %entry 17696; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17697; VLX-NEXT: kmovb %k0, %eax 17698; VLX-NEXT: retq 17699; 17700; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: 17701; NoVLX: # %bb.0: # %entry 17702; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17703; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 17704; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17705; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17706; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17707; NoVLX-NEXT: kmovw %k0, %eax 17708; NoVLX-NEXT: andl $3, %eax 17709; NoVLX-NEXT: vzeroupper 17710; NoVLX-NEXT: retq 17711entry: 17712 %0 = bitcast <2 x i64> %__a to <2 x i64> 17713 %load = load i64, i64* %__b 17714 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17715 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17716 %2 = icmp ult <2 x i64> %0, %1 17717 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17718 %4 = bitcast <4 x i1> %3 to i4 17719 ret i4 %4 17720} 17721 17722define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 17723; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: 17724; VLX: # %bb.0: # %entry 17725; VLX-NEXT: kmovd %edi, %k1 17726; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17727; VLX-NEXT: kmovb %k0, %eax 17728; VLX-NEXT: retq 17729; 17730; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: 17731; NoVLX: # %bb.0: # %entry 17732; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17733; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 17734; NoVLX-NEXT: kmovw %edi, %k1 17735; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17736; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17737; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17738; NoVLX-NEXT: kmovw %k0, %eax 17739; NoVLX-NEXT: andl $3, %eax 17740; NoVLX-NEXT: vzeroupper 17741; NoVLX-NEXT: retq 17742entry: 17743 %0 = bitcast <2 x i64> %__a to <2 x i64> 17744 %load = load i64, i64* %__b 17745 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17746 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17747 %2 = icmp ult <2 x i64> %0, %1 17748 %3 = bitcast i8 %__u to <8 x i1> 17749 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17750 %4 = and <2 x i1> %extract.i, %2 17751 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17752 %6 = bitcast <4 x i1> %5 to i4 17753 ret i4 %6 17754} 17755 17756 17757define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17758; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: 17759; VLX: # %bb.0: # %entry 17760; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17761; VLX-NEXT: kmovd %k0, %eax 17762; VLX-NEXT: # kill: def $al killed $al killed $eax 17763; VLX-NEXT: retq 17764; 17765; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: 17766; NoVLX: # %bb.0: # %entry 17767; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17768; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17769; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17770; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17771; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17772; NoVLX-NEXT: kmovw %k0, %eax 17773; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17774; NoVLX-NEXT: vzeroupper 17775; NoVLX-NEXT: retq 17776entry: 17777 %0 = bitcast <2 x i64> %__a to <2 x i64> 17778 %1 = bitcast <2 x i64> %__b to <2 x i64> 17779 %2 = icmp ult <2 x i64> %0, %1 17780 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17781 %4 = bitcast <8 x i1> %3 to i8 17782 ret i8 %4 17783} 17784 17785define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 17786; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: 17787; VLX: # %bb.0: # %entry 17788; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17789; VLX-NEXT: kmovd %k0, %eax 17790; VLX-NEXT: # kill: def $al killed $al killed $eax 17791; VLX-NEXT: retq 17792; 17793; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: 17794; NoVLX: # %bb.0: # %entry 17795; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17796; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17797; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17798; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17799; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17800; NoVLX-NEXT: kmovw %k0, %eax 17801; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17802; NoVLX-NEXT: vzeroupper 17803; NoVLX-NEXT: retq 17804entry: 17805 %0 = bitcast <2 x i64> %__a to <2 x i64> 17806 %load = load <2 x i64>, <2 x i64>* %__b 17807 %1 = bitcast <2 x i64> %load to <2 x i64> 17808 %2 = icmp ult <2 x i64> %0, %1 17809 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17810 %4 = bitcast <8 x i1> %3 to i8 17811 ret i8 %4 17812} 17813 17814define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17815; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: 17816; VLX: # %bb.0: # %entry 17817; VLX-NEXT: kmovd %edi, %k1 17818; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17819; VLX-NEXT: kmovd %k0, %eax 17820; VLX-NEXT: # kill: def $al killed $al killed $eax 17821; VLX-NEXT: retq 17822; 17823; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: 17824; NoVLX: # %bb.0: # %entry 17825; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17826; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17827; NoVLX-NEXT: kmovw %edi, %k1 17828; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17829; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17830; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17831; NoVLX-NEXT: kmovw %k0, %eax 17832; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17833; NoVLX-NEXT: vzeroupper 17834; NoVLX-NEXT: retq 17835entry: 17836 %0 = bitcast <2 x i64> %__a to <2 x i64> 17837 %1 = bitcast <2 x i64> %__b to <2 x i64> 17838 %2 = icmp ult <2 x i64> %0, %1 17839 %3 = bitcast i8 %__u to <8 x i1> 17840 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17841 %4 = and <2 x i1> %2, %extract.i 17842 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17843 %6 = bitcast <8 x i1> %5 to i8 17844 ret i8 %6 17845} 17846 17847define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 17848; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: 17849; VLX: # %bb.0: # %entry 17850; VLX-NEXT: kmovd %edi, %k1 17851; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17852; VLX-NEXT: kmovd %k0, %eax 17853; VLX-NEXT: # kill: def $al killed $al killed $eax 17854; VLX-NEXT: retq 17855; 17856; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: 17857; NoVLX: # %bb.0: # %entry 17858; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17859; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17860; NoVLX-NEXT: kmovw %edi, %k1 17861; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17862; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17863; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17864; NoVLX-NEXT: kmovw %k0, %eax 17865; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17866; NoVLX-NEXT: vzeroupper 17867; NoVLX-NEXT: retq 17868entry: 17869 %0 = bitcast <2 x i64> %__a to <2 x i64> 17870 %load = load <2 x i64>, <2 x i64>* %__b 17871 %1 = bitcast <2 x i64> %load to <2 x i64> 17872 %2 = icmp ult <2 x i64> %0, %1 17873 %3 = bitcast i8 %__u to <8 x i1> 17874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17875 %4 = and <2 x i1> %2, %extract.i 17876 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17877 %6 = bitcast <8 x i1> %5 to i8 17878 ret i8 %6 17879} 17880 17881 17882define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 17883; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: 17884; VLX: # %bb.0: # %entry 17885; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17886; VLX-NEXT: kmovd %k0, %eax 17887; VLX-NEXT: # kill: def $al killed $al killed $eax 17888; VLX-NEXT: retq 17889; 17890; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: 17891; NoVLX: # %bb.0: # %entry 17892; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17893; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 17894; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17895; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17896; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17897; NoVLX-NEXT: kmovw %k0, %eax 17898; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17899; NoVLX-NEXT: vzeroupper 17900; NoVLX-NEXT: retq 17901entry: 17902 %0 = bitcast <2 x i64> %__a to <2 x i64> 17903 %load = load i64, i64* %__b 17904 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17905 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17906 %2 = icmp ult <2 x i64> %0, %1 17907 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17908 %4 = bitcast <8 x i1> %3 to i8 17909 ret i8 %4 17910} 17911 17912define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 17913; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: 17914; VLX: # %bb.0: # %entry 17915; VLX-NEXT: kmovd %edi, %k1 17916; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17917; VLX-NEXT: kmovd %k0, %eax 17918; VLX-NEXT: # kill: def $al killed $al killed $eax 17919; VLX-NEXT: retq 17920; 17921; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: 17922; NoVLX: # %bb.0: # %entry 17923; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17924; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 17925; NoVLX-NEXT: kmovw %edi, %k1 17926; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17927; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17928; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17929; NoVLX-NEXT: kmovw %k0, %eax 17930; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17931; NoVLX-NEXT: vzeroupper 17932; NoVLX-NEXT: retq 17933entry: 17934 %0 = bitcast <2 x i64> %__a to <2 x i64> 17935 %load = load i64, i64* %__b 17936 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17937 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17938 %2 = icmp ult <2 x i64> %0, %1 17939 %3 = bitcast i8 %__u to <8 x i1> 17940 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17941 %4 = and <2 x i1> %extract.i, %2 17942 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17943 %6 = bitcast <8 x i1> %5 to i8 17944 ret i8 %6 17945} 17946 17947 17948define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17949; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: 17950; VLX: # %bb.0: # %entry 17951; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17952; VLX-NEXT: kmovd %k0, %eax 17953; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17954; VLX-NEXT: retq 17955; 17956; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: 17957; NoVLX: # %bb.0: # %entry 17958; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17959; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17960; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17961; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17962; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17963; NoVLX-NEXT: kmovw %k0, %eax 17964; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17965; NoVLX-NEXT: vzeroupper 17966; NoVLX-NEXT: retq 17967entry: 17968 %0 = bitcast <2 x i64> %__a to <2 x i64> 17969 %1 = bitcast <2 x i64> %__b to <2 x i64> 17970 %2 = icmp ult <2 x i64> %0, %1 17971 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17972 %4 = bitcast <16 x i1> %3 to i16 17973 ret i16 %4 17974} 17975 17976define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 17977; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: 17978; VLX: # %bb.0: # %entry 17979; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17980; VLX-NEXT: kmovd %k0, %eax 17981; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17982; VLX-NEXT: retq 17983; 17984; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: 17985; NoVLX: # %bb.0: # %entry 17986; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17987; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17988; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17989; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17990; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17991; NoVLX-NEXT: kmovw %k0, %eax 17992; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17993; NoVLX-NEXT: vzeroupper 17994; NoVLX-NEXT: retq 17995entry: 17996 %0 = bitcast <2 x i64> %__a to <2 x i64> 17997 %load = load <2 x i64>, <2 x i64>* %__b 17998 %1 = bitcast <2 x i64> %load to <2 x i64> 17999 %2 = icmp ult <2 x i64> %0, %1 18000 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18001 %4 = bitcast <16 x i1> %3 to i16 18002 ret i16 %4 18003} 18004 18005define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18006; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: 18007; VLX: # %bb.0: # %entry 18008; VLX-NEXT: kmovd %edi, %k1 18009; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 18010; VLX-NEXT: kmovd %k0, %eax 18011; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18012; VLX-NEXT: retq 18013; 18014; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: 18015; NoVLX: # %bb.0: # %entry 18016; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18017; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18018; NoVLX-NEXT: kmovw %edi, %k1 18019; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18020; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18021; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18022; NoVLX-NEXT: kmovw %k0, %eax 18023; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18024; NoVLX-NEXT: vzeroupper 18025; NoVLX-NEXT: retq 18026entry: 18027 %0 = bitcast <2 x i64> %__a to <2 x i64> 18028 %1 = bitcast <2 x i64> %__b to <2 x i64> 18029 %2 = icmp ult <2 x i64> %0, %1 18030 %3 = bitcast i8 %__u to <8 x i1> 18031 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18032 %4 = and <2 x i1> %2, %extract.i 18033 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18034 %6 = bitcast <16 x i1> %5 to i16 18035 ret i16 %6 18036} 18037 18038define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 18039; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: 18040; VLX: # %bb.0: # %entry 18041; VLX-NEXT: kmovd %edi, %k1 18042; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 18043; VLX-NEXT: kmovd %k0, %eax 18044; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18045; VLX-NEXT: retq 18046; 18047; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: 18048; NoVLX: # %bb.0: # %entry 18049; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18050; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 18051; NoVLX-NEXT: kmovw %edi, %k1 18052; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18053; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18054; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18055; NoVLX-NEXT: kmovw %k0, %eax 18056; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18057; NoVLX-NEXT: vzeroupper 18058; NoVLX-NEXT: retq 18059entry: 18060 %0 = bitcast <2 x i64> %__a to <2 x i64> 18061 %load = load <2 x i64>, <2 x i64>* %__b 18062 %1 = bitcast <2 x i64> %load to <2 x i64> 18063 %2 = icmp ult <2 x i64> %0, %1 18064 %3 = bitcast i8 %__u to <8 x i1> 18065 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18066 %4 = and <2 x i1> %2, %extract.i 18067 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18068 %6 = bitcast <16 x i1> %5 to i16 18069 ret i16 %6 18070} 18071 18072 18073define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 18074; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: 18075; VLX: # %bb.0: # %entry 18076; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 18077; VLX-NEXT: kmovd %k0, %eax 18078; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18079; VLX-NEXT: retq 18080; 18081; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: 18082; NoVLX: # %bb.0: # %entry 18083; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18084; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 18085; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18086; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18087; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18088; NoVLX-NEXT: kmovw %k0, %eax 18089; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18090; NoVLX-NEXT: vzeroupper 18091; NoVLX-NEXT: retq 18092entry: 18093 %0 = bitcast <2 x i64> %__a to <2 x i64> 18094 %load = load i64, i64* %__b 18095 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18096 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18097 %2 = icmp ult <2 x i64> %0, %1 18098 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18099 %4 = bitcast <16 x i1> %3 to i16 18100 ret i16 %4 18101} 18102 18103define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 18104; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: 18105; VLX: # %bb.0: # %entry 18106; VLX-NEXT: kmovd %edi, %k1 18107; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 18108; VLX-NEXT: kmovd %k0, %eax 18109; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18110; VLX-NEXT: retq 18111; 18112; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: 18113; NoVLX: # %bb.0: # %entry 18114; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18115; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 18116; NoVLX-NEXT: kmovw %edi, %k1 18117; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18118; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18119; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18120; NoVLX-NEXT: kmovw %k0, %eax 18121; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18122; NoVLX-NEXT: vzeroupper 18123; NoVLX-NEXT: retq 18124entry: 18125 %0 = bitcast <2 x i64> %__a to <2 x i64> 18126 %load = load i64, i64* %__b 18127 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18128 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18129 %2 = icmp ult <2 x i64> %0, %1 18130 %3 = bitcast i8 %__u to <8 x i1> 18131 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18132 %4 = and <2 x i1> %extract.i, %2 18133 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18134 %6 = bitcast <16 x i1> %5 to i16 18135 ret i16 %6 18136} 18137 18138 18139define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18140; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: 18141; VLX: # %bb.0: # %entry 18142; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 18143; VLX-NEXT: kmovd %k0, %eax 18144; VLX-NEXT: retq 18145; 18146; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: 18147; NoVLX: # %bb.0: # %entry 18148; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18149; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18150; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18151; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18152; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18153; NoVLX-NEXT: kmovw %k0, %eax 18154; NoVLX-NEXT: vzeroupper 18155; NoVLX-NEXT: retq 18156entry: 18157 %0 = bitcast <2 x i64> %__a to <2 x i64> 18158 %1 = bitcast <2 x i64> %__b to <2 x i64> 18159 %2 = icmp ult <2 x i64> %0, %1 18160 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18161 %4 = bitcast <32 x i1> %3 to i32 18162 ret i32 %4 18163} 18164 18165define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 18166; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: 18167; VLX: # %bb.0: # %entry 18168; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 18169; VLX-NEXT: kmovd %k0, %eax 18170; VLX-NEXT: retq 18171; 18172; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: 18173; NoVLX: # %bb.0: # %entry 18174; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18175; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 18176; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18177; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18178; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18179; NoVLX-NEXT: kmovw %k0, %eax 18180; NoVLX-NEXT: vzeroupper 18181; NoVLX-NEXT: retq 18182entry: 18183 %0 = bitcast <2 x i64> %__a to <2 x i64> 18184 %load = load <2 x i64>, <2 x i64>* %__b 18185 %1 = bitcast <2 x i64> %load to <2 x i64> 18186 %2 = icmp ult <2 x i64> %0, %1 18187 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18188 %4 = bitcast <32 x i1> %3 to i32 18189 ret i32 %4 18190} 18191 18192define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18193; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: 18194; VLX: # %bb.0: # %entry 18195; VLX-NEXT: kmovd %edi, %k1 18196; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 18197; VLX-NEXT: kmovd %k0, %eax 18198; VLX-NEXT: retq 18199; 18200; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: 18201; NoVLX: # %bb.0: # %entry 18202; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18203; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18204; NoVLX-NEXT: kmovw %edi, %k1 18205; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18206; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18207; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18208; NoVLX-NEXT: kmovw %k0, %eax 18209; NoVLX-NEXT: vzeroupper 18210; NoVLX-NEXT: retq 18211entry: 18212 %0 = bitcast <2 x i64> %__a to <2 x i64> 18213 %1 = bitcast <2 x i64> %__b to <2 x i64> 18214 %2 = icmp ult <2 x i64> %0, %1 18215 %3 = bitcast i8 %__u to <8 x i1> 18216 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18217 %4 = and <2 x i1> %2, %extract.i 18218 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18219 %6 = bitcast <32 x i1> %5 to i32 18220 ret i32 %6 18221} 18222 18223define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 18224; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: 18225; VLX: # %bb.0: # %entry 18226; VLX-NEXT: kmovd %edi, %k1 18227; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 18228; VLX-NEXT: kmovd %k0, %eax 18229; VLX-NEXT: retq 18230; 18231; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: 18232; NoVLX: # %bb.0: # %entry 18233; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18234; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 18235; NoVLX-NEXT: kmovw %edi, %k1 18236; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18237; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18238; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18239; NoVLX-NEXT: kmovw %k0, %eax 18240; NoVLX-NEXT: vzeroupper 18241; NoVLX-NEXT: retq 18242entry: 18243 %0 = bitcast <2 x i64> %__a to <2 x i64> 18244 %load = load <2 x i64>, <2 x i64>* %__b 18245 %1 = bitcast <2 x i64> %load to <2 x i64> 18246 %2 = icmp ult <2 x i64> %0, %1 18247 %3 = bitcast i8 %__u to <8 x i1> 18248 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18249 %4 = and <2 x i1> %2, %extract.i 18250 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18251 %6 = bitcast <32 x i1> %5 to i32 18252 ret i32 %6 18253} 18254 18255 18256define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 18257; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: 18258; VLX: # %bb.0: # %entry 18259; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 18260; VLX-NEXT: kmovd %k0, %eax 18261; VLX-NEXT: retq 18262; 18263; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: 18264; NoVLX: # %bb.0: # %entry 18265; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18266; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 18267; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18268; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18269; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18270; NoVLX-NEXT: kmovw %k0, %eax 18271; NoVLX-NEXT: vzeroupper 18272; NoVLX-NEXT: retq 18273entry: 18274 %0 = bitcast <2 x i64> %__a to <2 x i64> 18275 %load = load i64, i64* %__b 18276 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18277 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18278 %2 = icmp ult <2 x i64> %0, %1 18279 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18280 %4 = bitcast <32 x i1> %3 to i32 18281 ret i32 %4 18282} 18283 18284define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 18285; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: 18286; VLX: # %bb.0: # %entry 18287; VLX-NEXT: kmovd %edi, %k1 18288; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 18289; VLX-NEXT: kmovd %k0, %eax 18290; VLX-NEXT: retq 18291; 18292; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: 18293; NoVLX: # %bb.0: # %entry 18294; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18295; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 18296; NoVLX-NEXT: kmovw %edi, %k1 18297; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18298; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18299; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18300; NoVLX-NEXT: kmovw %k0, %eax 18301; NoVLX-NEXT: vzeroupper 18302; NoVLX-NEXT: retq 18303entry: 18304 %0 = bitcast <2 x i64> %__a to <2 x i64> 18305 %load = load i64, i64* %__b 18306 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18307 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18308 %2 = icmp ult <2 x i64> %0, %1 18309 %3 = bitcast i8 %__u to <8 x i1> 18310 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18311 %4 = and <2 x i1> %extract.i, %2 18312 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18313 %6 = bitcast <32 x i1> %5 to i32 18314 ret i32 %6 18315} 18316 18317 18318define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18319; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: 18320; VLX: # %bb.0: # %entry 18321; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 18322; VLX-NEXT: kmovq %k0, %rax 18323; VLX-NEXT: retq 18324; 18325; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: 18326; NoVLX: # %bb.0: # %entry 18327; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18328; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18329; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18330; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18331; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18332; NoVLX-NEXT: kmovw %k0, %eax 18333; NoVLX-NEXT: movzwl %ax, %eax 18334; NoVLX-NEXT: vzeroupper 18335; NoVLX-NEXT: retq 18336entry: 18337 %0 = bitcast <2 x i64> %__a to <2 x i64> 18338 %1 = bitcast <2 x i64> %__b to <2 x i64> 18339 %2 = icmp ult <2 x i64> %0, %1 18340 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18341 %4 = bitcast <64 x i1> %3 to i64 18342 ret i64 %4 18343} 18344 18345define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 18346; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: 18347; VLX: # %bb.0: # %entry 18348; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 18349; VLX-NEXT: kmovq %k0, %rax 18350; VLX-NEXT: retq 18351; 18352; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: 18353; NoVLX: # %bb.0: # %entry 18354; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18355; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 18356; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18357; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18358; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18359; NoVLX-NEXT: kmovw %k0, %eax 18360; NoVLX-NEXT: movzwl %ax, %eax 18361; NoVLX-NEXT: vzeroupper 18362; NoVLX-NEXT: retq 18363entry: 18364 %0 = bitcast <2 x i64> %__a to <2 x i64> 18365 %load = load <2 x i64>, <2 x i64>* %__b 18366 %1 = bitcast <2 x i64> %load to <2 x i64> 18367 %2 = icmp ult <2 x i64> %0, %1 18368 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18369 %4 = bitcast <64 x i1> %3 to i64 18370 ret i64 %4 18371} 18372 18373define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18374; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: 18375; VLX: # %bb.0: # %entry 18376; VLX-NEXT: kmovd %edi, %k1 18377; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 18378; VLX-NEXT: kmovq %k0, %rax 18379; VLX-NEXT: retq 18380; 18381; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: 18382; NoVLX: # %bb.0: # %entry 18383; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18384; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18385; NoVLX-NEXT: kmovw %edi, %k1 18386; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18387; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18388; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18389; NoVLX-NEXT: kmovw %k0, %eax 18390; NoVLX-NEXT: movzwl %ax, %eax 18391; NoVLX-NEXT: vzeroupper 18392; NoVLX-NEXT: retq 18393entry: 18394 %0 = bitcast <2 x i64> %__a to <2 x i64> 18395 %1 = bitcast <2 x i64> %__b to <2 x i64> 18396 %2 = icmp ult <2 x i64> %0, %1 18397 %3 = bitcast i8 %__u to <8 x i1> 18398 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18399 %4 = and <2 x i1> %2, %extract.i 18400 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18401 %6 = bitcast <64 x i1> %5 to i64 18402 ret i64 %6 18403} 18404 18405define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 18406; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: 18407; VLX: # %bb.0: # %entry 18408; VLX-NEXT: kmovd %edi, %k1 18409; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 18410; VLX-NEXT: kmovq %k0, %rax 18411; VLX-NEXT: retq 18412; 18413; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: 18414; NoVLX: # %bb.0: # %entry 18415; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18416; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 18417; NoVLX-NEXT: kmovw %edi, %k1 18418; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18419; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18420; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18421; NoVLX-NEXT: kmovw %k0, %eax 18422; NoVLX-NEXT: movzwl %ax, %eax 18423; NoVLX-NEXT: vzeroupper 18424; NoVLX-NEXT: retq 18425entry: 18426 %0 = bitcast <2 x i64> %__a to <2 x i64> 18427 %load = load <2 x i64>, <2 x i64>* %__b 18428 %1 = bitcast <2 x i64> %load to <2 x i64> 18429 %2 = icmp ult <2 x i64> %0, %1 18430 %3 = bitcast i8 %__u to <8 x i1> 18431 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18432 %4 = and <2 x i1> %2, %extract.i 18433 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18434 %6 = bitcast <64 x i1> %5 to i64 18435 ret i64 %6 18436} 18437 18438 18439define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { 18440; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: 18441; VLX: # %bb.0: # %entry 18442; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 18443; VLX-NEXT: kmovq %k0, %rax 18444; VLX-NEXT: retq 18445; 18446; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: 18447; NoVLX: # %bb.0: # %entry 18448; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18449; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 18450; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18451; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18452; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18453; NoVLX-NEXT: kmovw %k0, %eax 18454; NoVLX-NEXT: movzwl %ax, %eax 18455; NoVLX-NEXT: vzeroupper 18456; NoVLX-NEXT: retq 18457entry: 18458 %0 = bitcast <2 x i64> %__a to <2 x i64> 18459 %load = load i64, i64* %__b 18460 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18461 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18462 %2 = icmp ult <2 x i64> %0, %1 18463 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18464 %4 = bitcast <64 x i1> %3 to i64 18465 ret i64 %4 18466} 18467 18468define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { 18469; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: 18470; VLX: # %bb.0: # %entry 18471; VLX-NEXT: kmovd %edi, %k1 18472; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 18473; VLX-NEXT: kmovq %k0, %rax 18474; VLX-NEXT: retq 18475; 18476; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: 18477; NoVLX: # %bb.0: # %entry 18478; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18479; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 18480; NoVLX-NEXT: kmovw %edi, %k1 18481; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18482; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18483; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18484; NoVLX-NEXT: kmovw %k0, %eax 18485; NoVLX-NEXT: movzwl %ax, %eax 18486; NoVLX-NEXT: vzeroupper 18487; NoVLX-NEXT: retq 18488entry: 18489 %0 = bitcast <2 x i64> %__a to <2 x i64> 18490 %load = load i64, i64* %__b 18491 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18492 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18493 %2 = icmp ult <2 x i64> %0, %1 18494 %3 = bitcast i8 %__u to <8 x i1> 18495 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18496 %4 = and <2 x i1> %extract.i, %2 18497 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18498 %6 = bitcast <64 x i1> %5 to i64 18499 ret i64 %6 18500} 18501 18502 18503define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18504; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: 18505; VLX: # %bb.0: # %entry 18506; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18507; VLX-NEXT: kmovd %k0, %eax 18508; VLX-NEXT: # kill: def $al killed $al killed $eax 18509; VLX-NEXT: vzeroupper 18510; VLX-NEXT: retq 18511; 18512; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: 18513; NoVLX: # %bb.0: # %entry 18514; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18515; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18516; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18517; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18518; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18519; NoVLX-NEXT: kmovw %k0, %eax 18520; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18521; NoVLX-NEXT: vzeroupper 18522; NoVLX-NEXT: retq 18523entry: 18524 %0 = bitcast <4 x i64> %__a to <4 x i64> 18525 %1 = bitcast <4 x i64> %__b to <4 x i64> 18526 %2 = icmp ult <4 x i64> %0, %1 18527 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18528 %4 = bitcast <8 x i1> %3 to i8 18529 ret i8 %4 18530} 18531 18532define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18533; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: 18534; VLX: # %bb.0: # %entry 18535; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18536; VLX-NEXT: kmovd %k0, %eax 18537; VLX-NEXT: # kill: def $al killed $al killed $eax 18538; VLX-NEXT: vzeroupper 18539; VLX-NEXT: retq 18540; 18541; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: 18542; NoVLX: # %bb.0: # %entry 18543; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18544; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18545; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18546; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18547; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18548; NoVLX-NEXT: kmovw %k0, %eax 18549; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18550; NoVLX-NEXT: vzeroupper 18551; NoVLX-NEXT: retq 18552entry: 18553 %0 = bitcast <4 x i64> %__a to <4 x i64> 18554 %load = load <4 x i64>, <4 x i64>* %__b 18555 %1 = bitcast <4 x i64> %load to <4 x i64> 18556 %2 = icmp ult <4 x i64> %0, %1 18557 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18558 %4 = bitcast <8 x i1> %3 to i8 18559 ret i8 %4 18560} 18561 18562define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18563; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: 18564; VLX: # %bb.0: # %entry 18565; VLX-NEXT: kmovd %edi, %k1 18566; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18567; VLX-NEXT: kmovd %k0, %eax 18568; VLX-NEXT: # kill: def $al killed $al killed $eax 18569; VLX-NEXT: vzeroupper 18570; VLX-NEXT: retq 18571; 18572; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: 18573; NoVLX: # %bb.0: # %entry 18574; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18575; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18576; NoVLX-NEXT: kmovw %edi, %k1 18577; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18578; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18579; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18580; NoVLX-NEXT: kmovw %k0, %eax 18581; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18582; NoVLX-NEXT: vzeroupper 18583; NoVLX-NEXT: retq 18584entry: 18585 %0 = bitcast <4 x i64> %__a to <4 x i64> 18586 %1 = bitcast <4 x i64> %__b to <4 x i64> 18587 %2 = icmp ult <4 x i64> %0, %1 18588 %3 = bitcast i8 %__u to <8 x i1> 18589 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18590 %4 = and <4 x i1> %2, %extract.i 18591 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18592 %6 = bitcast <8 x i1> %5 to i8 18593 ret i8 %6 18594} 18595 18596define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18597; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: 18598; VLX: # %bb.0: # %entry 18599; VLX-NEXT: kmovd %edi, %k1 18600; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18601; VLX-NEXT: kmovd %k0, %eax 18602; VLX-NEXT: # kill: def $al killed $al killed $eax 18603; VLX-NEXT: vzeroupper 18604; VLX-NEXT: retq 18605; 18606; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: 18607; NoVLX: # %bb.0: # %entry 18608; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18609; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18610; NoVLX-NEXT: kmovw %edi, %k1 18611; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18612; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18613; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18614; NoVLX-NEXT: kmovw %k0, %eax 18615; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18616; NoVLX-NEXT: vzeroupper 18617; NoVLX-NEXT: retq 18618entry: 18619 %0 = bitcast <4 x i64> %__a to <4 x i64> 18620 %load = load <4 x i64>, <4 x i64>* %__b 18621 %1 = bitcast <4 x i64> %load to <4 x i64> 18622 %2 = icmp ult <4 x i64> %0, %1 18623 %3 = bitcast i8 %__u to <8 x i1> 18624 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18625 %4 = and <4 x i1> %2, %extract.i 18626 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18627 %6 = bitcast <8 x i1> %5 to i8 18628 ret i8 %6 18629} 18630 18631 18632define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 18633; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: 18634; VLX: # %bb.0: # %entry 18635; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18636; VLX-NEXT: kmovd %k0, %eax 18637; VLX-NEXT: # kill: def $al killed $al killed $eax 18638; VLX-NEXT: vzeroupper 18639; VLX-NEXT: retq 18640; 18641; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: 18642; NoVLX: # %bb.0: # %entry 18643; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18644; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 18645; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18646; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18647; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18648; NoVLX-NEXT: kmovw %k0, %eax 18649; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18650; NoVLX-NEXT: vzeroupper 18651; NoVLX-NEXT: retq 18652entry: 18653 %0 = bitcast <4 x i64> %__a to <4 x i64> 18654 %load = load i64, i64* %__b 18655 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18656 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18657 %2 = icmp ult <4 x i64> %0, %1 18658 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18659 %4 = bitcast <8 x i1> %3 to i8 18660 ret i8 %4 18661} 18662 18663define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 18664; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: 18665; VLX: # %bb.0: # %entry 18666; VLX-NEXT: kmovd %edi, %k1 18667; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18668; VLX-NEXT: kmovd %k0, %eax 18669; VLX-NEXT: # kill: def $al killed $al killed $eax 18670; VLX-NEXT: vzeroupper 18671; VLX-NEXT: retq 18672; 18673; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: 18674; NoVLX: # %bb.0: # %entry 18675; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18676; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 18677; NoVLX-NEXT: kmovw %edi, %k1 18678; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18679; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18680; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18681; NoVLX-NEXT: kmovw %k0, %eax 18682; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18683; NoVLX-NEXT: vzeroupper 18684; NoVLX-NEXT: retq 18685entry: 18686 %0 = bitcast <4 x i64> %__a to <4 x i64> 18687 %load = load i64, i64* %__b 18688 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18689 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18690 %2 = icmp ult <4 x i64> %0, %1 18691 %3 = bitcast i8 %__u to <8 x i1> 18692 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18693 %4 = and <4 x i1> %extract.i, %2 18694 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18695 %6 = bitcast <8 x i1> %5 to i8 18696 ret i8 %6 18697} 18698 18699 18700define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18701; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: 18702; VLX: # %bb.0: # %entry 18703; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18704; VLX-NEXT: kmovd %k0, %eax 18705; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18706; VLX-NEXT: vzeroupper 18707; VLX-NEXT: retq 18708; 18709; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: 18710; NoVLX: # %bb.0: # %entry 18711; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18712; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18713; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18714; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18715; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18716; NoVLX-NEXT: kmovw %k0, %eax 18717; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18718; NoVLX-NEXT: vzeroupper 18719; NoVLX-NEXT: retq 18720entry: 18721 %0 = bitcast <4 x i64> %__a to <4 x i64> 18722 %1 = bitcast <4 x i64> %__b to <4 x i64> 18723 %2 = icmp ult <4 x i64> %0, %1 18724 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18725 %4 = bitcast <16 x i1> %3 to i16 18726 ret i16 %4 18727} 18728 18729define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18730; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: 18731; VLX: # %bb.0: # %entry 18732; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18733; VLX-NEXT: kmovd %k0, %eax 18734; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18735; VLX-NEXT: vzeroupper 18736; VLX-NEXT: retq 18737; 18738; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: 18739; NoVLX: # %bb.0: # %entry 18740; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18741; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18742; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18743; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18744; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18745; NoVLX-NEXT: kmovw %k0, %eax 18746; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18747; NoVLX-NEXT: vzeroupper 18748; NoVLX-NEXT: retq 18749entry: 18750 %0 = bitcast <4 x i64> %__a to <4 x i64> 18751 %load = load <4 x i64>, <4 x i64>* %__b 18752 %1 = bitcast <4 x i64> %load to <4 x i64> 18753 %2 = icmp ult <4 x i64> %0, %1 18754 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18755 %4 = bitcast <16 x i1> %3 to i16 18756 ret i16 %4 18757} 18758 18759define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18760; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: 18761; VLX: # %bb.0: # %entry 18762; VLX-NEXT: kmovd %edi, %k1 18763; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18764; VLX-NEXT: kmovd %k0, %eax 18765; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18766; VLX-NEXT: vzeroupper 18767; VLX-NEXT: retq 18768; 18769; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: 18770; NoVLX: # %bb.0: # %entry 18771; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18772; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18773; NoVLX-NEXT: kmovw %edi, %k1 18774; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18775; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18776; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18777; NoVLX-NEXT: kmovw %k0, %eax 18778; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18779; NoVLX-NEXT: vzeroupper 18780; NoVLX-NEXT: retq 18781entry: 18782 %0 = bitcast <4 x i64> %__a to <4 x i64> 18783 %1 = bitcast <4 x i64> %__b to <4 x i64> 18784 %2 = icmp ult <4 x i64> %0, %1 18785 %3 = bitcast i8 %__u to <8 x i1> 18786 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18787 %4 = and <4 x i1> %2, %extract.i 18788 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18789 %6 = bitcast <16 x i1> %5 to i16 18790 ret i16 %6 18791} 18792 18793define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18794; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: 18795; VLX: # %bb.0: # %entry 18796; VLX-NEXT: kmovd %edi, %k1 18797; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18798; VLX-NEXT: kmovd %k0, %eax 18799; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18800; VLX-NEXT: vzeroupper 18801; VLX-NEXT: retq 18802; 18803; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: 18804; NoVLX: # %bb.0: # %entry 18805; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18806; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18807; NoVLX-NEXT: kmovw %edi, %k1 18808; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18809; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18810; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18811; NoVLX-NEXT: kmovw %k0, %eax 18812; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18813; NoVLX-NEXT: vzeroupper 18814; NoVLX-NEXT: retq 18815entry: 18816 %0 = bitcast <4 x i64> %__a to <4 x i64> 18817 %load = load <4 x i64>, <4 x i64>* %__b 18818 %1 = bitcast <4 x i64> %load to <4 x i64> 18819 %2 = icmp ult <4 x i64> %0, %1 18820 %3 = bitcast i8 %__u to <8 x i1> 18821 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18822 %4 = and <4 x i1> %2, %extract.i 18823 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18824 %6 = bitcast <16 x i1> %5 to i16 18825 ret i16 %6 18826} 18827 18828 18829define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 18830; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: 18831; VLX: # %bb.0: # %entry 18832; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18833; VLX-NEXT: kmovd %k0, %eax 18834; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18835; VLX-NEXT: vzeroupper 18836; VLX-NEXT: retq 18837; 18838; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: 18839; NoVLX: # %bb.0: # %entry 18840; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18841; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 18842; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18843; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18844; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18845; NoVLX-NEXT: kmovw %k0, %eax 18846; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18847; NoVLX-NEXT: vzeroupper 18848; NoVLX-NEXT: retq 18849entry: 18850 %0 = bitcast <4 x i64> %__a to <4 x i64> 18851 %load = load i64, i64* %__b 18852 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18853 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18854 %2 = icmp ult <4 x i64> %0, %1 18855 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18856 %4 = bitcast <16 x i1> %3 to i16 18857 ret i16 %4 18858} 18859 18860define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 18861; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: 18862; VLX: # %bb.0: # %entry 18863; VLX-NEXT: kmovd %edi, %k1 18864; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18865; VLX-NEXT: kmovd %k0, %eax 18866; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18867; VLX-NEXT: vzeroupper 18868; VLX-NEXT: retq 18869; 18870; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: 18871; NoVLX: # %bb.0: # %entry 18872; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18873; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 18874; NoVLX-NEXT: kmovw %edi, %k1 18875; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18876; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18877; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18878; NoVLX-NEXT: kmovw %k0, %eax 18879; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18880; NoVLX-NEXT: vzeroupper 18881; NoVLX-NEXT: retq 18882entry: 18883 %0 = bitcast <4 x i64> %__a to <4 x i64> 18884 %load = load i64, i64* %__b 18885 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18886 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18887 %2 = icmp ult <4 x i64> %0, %1 18888 %3 = bitcast i8 %__u to <8 x i1> 18889 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18890 %4 = and <4 x i1> %extract.i, %2 18891 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18892 %6 = bitcast <16 x i1> %5 to i16 18893 ret i16 %6 18894} 18895 18896 18897define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18898; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: 18899; VLX: # %bb.0: # %entry 18900; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18901; VLX-NEXT: kmovd %k0, %eax 18902; VLX-NEXT: vzeroupper 18903; VLX-NEXT: retq 18904; 18905; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: 18906; NoVLX: # %bb.0: # %entry 18907; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18908; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18909; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18910; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18911; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18912; NoVLX-NEXT: kmovw %k0, %eax 18913; NoVLX-NEXT: vzeroupper 18914; NoVLX-NEXT: retq 18915entry: 18916 %0 = bitcast <4 x i64> %__a to <4 x i64> 18917 %1 = bitcast <4 x i64> %__b to <4 x i64> 18918 %2 = icmp ult <4 x i64> %0, %1 18919 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18920 %4 = bitcast <32 x i1> %3 to i32 18921 ret i32 %4 18922} 18923 18924define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18925; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: 18926; VLX: # %bb.0: # %entry 18927; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18928; VLX-NEXT: kmovd %k0, %eax 18929; VLX-NEXT: vzeroupper 18930; VLX-NEXT: retq 18931; 18932; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: 18933; NoVLX: # %bb.0: # %entry 18934; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18935; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18936; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18937; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18938; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18939; NoVLX-NEXT: kmovw %k0, %eax 18940; NoVLX-NEXT: vzeroupper 18941; NoVLX-NEXT: retq 18942entry: 18943 %0 = bitcast <4 x i64> %__a to <4 x i64> 18944 %load = load <4 x i64>, <4 x i64>* %__b 18945 %1 = bitcast <4 x i64> %load to <4 x i64> 18946 %2 = icmp ult <4 x i64> %0, %1 18947 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18948 %4 = bitcast <32 x i1> %3 to i32 18949 ret i32 %4 18950} 18951 18952define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18953; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: 18954; VLX: # %bb.0: # %entry 18955; VLX-NEXT: kmovd %edi, %k1 18956; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18957; VLX-NEXT: kmovd %k0, %eax 18958; VLX-NEXT: vzeroupper 18959; VLX-NEXT: retq 18960; 18961; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: 18962; NoVLX: # %bb.0: # %entry 18963; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18964; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18965; NoVLX-NEXT: kmovw %edi, %k1 18966; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18967; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18968; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18969; NoVLX-NEXT: kmovw %k0, %eax 18970; NoVLX-NEXT: vzeroupper 18971; NoVLX-NEXT: retq 18972entry: 18973 %0 = bitcast <4 x i64> %__a to <4 x i64> 18974 %1 = bitcast <4 x i64> %__b to <4 x i64> 18975 %2 = icmp ult <4 x i64> %0, %1 18976 %3 = bitcast i8 %__u to <8 x i1> 18977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18978 %4 = and <4 x i1> %2, %extract.i 18979 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18980 %6 = bitcast <32 x i1> %5 to i32 18981 ret i32 %6 18982} 18983 18984define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 18985; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: 18986; VLX: # %bb.0: # %entry 18987; VLX-NEXT: kmovd %edi, %k1 18988; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18989; VLX-NEXT: kmovd %k0, %eax 18990; VLX-NEXT: vzeroupper 18991; VLX-NEXT: retq 18992; 18993; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: 18994; NoVLX: # %bb.0: # %entry 18995; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18996; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18997; NoVLX-NEXT: kmovw %edi, %k1 18998; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18999; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19000; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19001; NoVLX-NEXT: kmovw %k0, %eax 19002; NoVLX-NEXT: vzeroupper 19003; NoVLX-NEXT: retq 19004entry: 19005 %0 = bitcast <4 x i64> %__a to <4 x i64> 19006 %load = load <4 x i64>, <4 x i64>* %__b 19007 %1 = bitcast <4 x i64> %load to <4 x i64> 19008 %2 = icmp ult <4 x i64> %0, %1 19009 %3 = bitcast i8 %__u to <8 x i1> 19010 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19011 %4 = and <4 x i1> %2, %extract.i 19012 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19013 %6 = bitcast <32 x i1> %5 to i32 19014 ret i32 %6 19015} 19016 19017 19018define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 19019; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: 19020; VLX: # %bb.0: # %entry 19021; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 19022; VLX-NEXT: kmovd %k0, %eax 19023; VLX-NEXT: vzeroupper 19024; VLX-NEXT: retq 19025; 19026; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: 19027; NoVLX: # %bb.0: # %entry 19028; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19029; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 19030; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19031; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19032; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19033; NoVLX-NEXT: kmovw %k0, %eax 19034; NoVLX-NEXT: vzeroupper 19035; NoVLX-NEXT: retq 19036entry: 19037 %0 = bitcast <4 x i64> %__a to <4 x i64> 19038 %load = load i64, i64* %__b 19039 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 19040 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19041 %2 = icmp ult <4 x i64> %0, %1 19042 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19043 %4 = bitcast <32 x i1> %3 to i32 19044 ret i32 %4 19045} 19046 19047define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 19048; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: 19049; VLX: # %bb.0: # %entry 19050; VLX-NEXT: kmovd %edi, %k1 19051; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 19052; VLX-NEXT: kmovd %k0, %eax 19053; VLX-NEXT: vzeroupper 19054; VLX-NEXT: retq 19055; 19056; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: 19057; NoVLX: # %bb.0: # %entry 19058; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19059; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 19060; NoVLX-NEXT: kmovw %edi, %k1 19061; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19062; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19063; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19064; NoVLX-NEXT: kmovw %k0, %eax 19065; NoVLX-NEXT: vzeroupper 19066; NoVLX-NEXT: retq 19067entry: 19068 %0 = bitcast <4 x i64> %__a to <4 x i64> 19069 %load = load i64, i64* %__b 19070 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 19071 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19072 %2 = icmp ult <4 x i64> %0, %1 19073 %3 = bitcast i8 %__u to <8 x i1> 19074 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19075 %4 = and <4 x i1> %extract.i, %2 19076 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19077 %6 = bitcast <32 x i1> %5 to i32 19078 ret i32 %6 19079} 19080 19081 19082define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 19083; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: 19084; VLX: # %bb.0: # %entry 19085; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 19086; VLX-NEXT: kmovq %k0, %rax 19087; VLX-NEXT: vzeroupper 19088; VLX-NEXT: retq 19089; 19090; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: 19091; NoVLX: # %bb.0: # %entry 19092; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 19093; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19094; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19095; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19096; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19097; NoVLX-NEXT: kmovw %k0, %eax 19098; NoVLX-NEXT: movzwl %ax, %eax 19099; NoVLX-NEXT: vzeroupper 19100; NoVLX-NEXT: retq 19101entry: 19102 %0 = bitcast <4 x i64> %__a to <4 x i64> 19103 %1 = bitcast <4 x i64> %__b to <4 x i64> 19104 %2 = icmp ult <4 x i64> %0, %1 19105 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19106 %4 = bitcast <64 x i1> %3 to i64 19107 ret i64 %4 19108} 19109 19110define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 19111; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: 19112; VLX: # %bb.0: # %entry 19113; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 19114; VLX-NEXT: kmovq %k0, %rax 19115; VLX-NEXT: vzeroupper 19116; VLX-NEXT: retq 19117; 19118; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: 19119; NoVLX: # %bb.0: # %entry 19120; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19121; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 19122; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19123; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19124; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19125; NoVLX-NEXT: kmovw %k0, %eax 19126; NoVLX-NEXT: movzwl %ax, %eax 19127; NoVLX-NEXT: vzeroupper 19128; NoVLX-NEXT: retq 19129entry: 19130 %0 = bitcast <4 x i64> %__a to <4 x i64> 19131 %load = load <4 x i64>, <4 x i64>* %__b 19132 %1 = bitcast <4 x i64> %load to <4 x i64> 19133 %2 = icmp ult <4 x i64> %0, %1 19134 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19135 %4 = bitcast <64 x i1> %3 to i64 19136 ret i64 %4 19137} 19138 19139define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 19140; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: 19141; VLX: # %bb.0: # %entry 19142; VLX-NEXT: kmovd %edi, %k1 19143; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 19144; VLX-NEXT: kmovq %k0, %rax 19145; VLX-NEXT: vzeroupper 19146; VLX-NEXT: retq 19147; 19148; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: 19149; NoVLX: # %bb.0: # %entry 19150; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 19151; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19152; NoVLX-NEXT: kmovw %edi, %k1 19153; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19154; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19155; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19156; NoVLX-NEXT: kmovw %k0, %eax 19157; NoVLX-NEXT: movzwl %ax, %eax 19158; NoVLX-NEXT: vzeroupper 19159; NoVLX-NEXT: retq 19160entry: 19161 %0 = bitcast <4 x i64> %__a to <4 x i64> 19162 %1 = bitcast <4 x i64> %__b to <4 x i64> 19163 %2 = icmp ult <4 x i64> %0, %1 19164 %3 = bitcast i8 %__u to <8 x i1> 19165 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19166 %4 = and <4 x i1> %2, %extract.i 19167 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19168 %6 = bitcast <64 x i1> %5 to i64 19169 ret i64 %6 19170} 19171 19172define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 19173; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: 19174; VLX: # %bb.0: # %entry 19175; VLX-NEXT: kmovd %edi, %k1 19176; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 19177; VLX-NEXT: kmovq %k0, %rax 19178; VLX-NEXT: vzeroupper 19179; VLX-NEXT: retq 19180; 19181; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: 19182; NoVLX: # %bb.0: # %entry 19183; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19184; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 19185; NoVLX-NEXT: kmovw %edi, %k1 19186; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19187; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19188; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19189; NoVLX-NEXT: kmovw %k0, %eax 19190; NoVLX-NEXT: movzwl %ax, %eax 19191; NoVLX-NEXT: vzeroupper 19192; NoVLX-NEXT: retq 19193entry: 19194 %0 = bitcast <4 x i64> %__a to <4 x i64> 19195 %load = load <4 x i64>, <4 x i64>* %__b 19196 %1 = bitcast <4 x i64> %load to <4 x i64> 19197 %2 = icmp ult <4 x i64> %0, %1 19198 %3 = bitcast i8 %__u to <8 x i1> 19199 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19200 %4 = and <4 x i1> %2, %extract.i 19201 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19202 %6 = bitcast <64 x i1> %5 to i64 19203 ret i64 %6 19204} 19205 19206 19207define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { 19208; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: 19209; VLX: # %bb.0: # %entry 19210; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 19211; VLX-NEXT: kmovq %k0, %rax 19212; VLX-NEXT: vzeroupper 19213; VLX-NEXT: retq 19214; 19215; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: 19216; NoVLX: # %bb.0: # %entry 19217; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19218; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 19219; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19220; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19221; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19222; NoVLX-NEXT: kmovw %k0, %eax 19223; NoVLX-NEXT: movzwl %ax, %eax 19224; NoVLX-NEXT: vzeroupper 19225; NoVLX-NEXT: retq 19226entry: 19227 %0 = bitcast <4 x i64> %__a to <4 x i64> 19228 %load = load i64, i64* %__b 19229 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 19230 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19231 %2 = icmp ult <4 x i64> %0, %1 19232 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19233 %4 = bitcast <64 x i1> %3 to i64 19234 ret i64 %4 19235} 19236 19237define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { 19238; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: 19239; VLX: # %bb.0: # %entry 19240; VLX-NEXT: kmovd %edi, %k1 19241; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 19242; VLX-NEXT: kmovq %k0, %rax 19243; VLX-NEXT: vzeroupper 19244; VLX-NEXT: retq 19245; 19246; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: 19247; NoVLX: # %bb.0: # %entry 19248; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 19249; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 19250; NoVLX-NEXT: kmovw %edi, %k1 19251; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19252; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19253; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19254; NoVLX-NEXT: kmovw %k0, %eax 19255; NoVLX-NEXT: movzwl %ax, %eax 19256; NoVLX-NEXT: vzeroupper 19257; NoVLX-NEXT: retq 19258entry: 19259 %0 = bitcast <4 x i64> %__a to <4 x i64> 19260 %load = load i64, i64* %__b 19261 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 19262 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19263 %2 = icmp ult <4 x i64> %0, %1 19264 %3 = bitcast i8 %__u to <8 x i1> 19265 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 19266 %4 = and <4 x i1> %extract.i, %2 19267 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19268 %6 = bitcast <64 x i1> %5 to i64 19269 ret i64 %6 19270} 19271 19272 19273define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19274; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: 19275; VLX: # %bb.0: # %entry 19276; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19277; VLX-NEXT: kmovd %k0, %eax 19278; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19279; VLX-NEXT: vzeroupper 19280; VLX-NEXT: retq 19281; 19282; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: 19283; NoVLX: # %bb.0: # %entry 19284; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19285; NoVLX-NEXT: kmovw %k0, %eax 19286; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19287; NoVLX-NEXT: vzeroupper 19288; NoVLX-NEXT: retq 19289entry: 19290 %0 = bitcast <8 x i64> %__a to <8 x i64> 19291 %1 = bitcast <8 x i64> %__b to <8 x i64> 19292 %2 = icmp ult <8 x i64> %0, %1 19293 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19294 %4 = bitcast <16 x i1> %3 to i16 19295 ret i16 %4 19296} 19297 19298define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19299; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: 19300; VLX: # %bb.0: # %entry 19301; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19302; VLX-NEXT: kmovd %k0, %eax 19303; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19304; VLX-NEXT: vzeroupper 19305; VLX-NEXT: retq 19306; 19307; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: 19308; NoVLX: # %bb.0: # %entry 19309; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19310; NoVLX-NEXT: kmovw %k0, %eax 19311; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19312; NoVLX-NEXT: vzeroupper 19313; NoVLX-NEXT: retq 19314entry: 19315 %0 = bitcast <8 x i64> %__a to <8 x i64> 19316 %load = load <8 x i64>, <8 x i64>* %__b 19317 %1 = bitcast <8 x i64> %load to <8 x i64> 19318 %2 = icmp ult <8 x i64> %0, %1 19319 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19320 %4 = bitcast <16 x i1> %3 to i16 19321 ret i16 %4 19322} 19323 19324define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19325; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: 19326; VLX: # %bb.0: # %entry 19327; VLX-NEXT: kmovd %edi, %k1 19328; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19329; VLX-NEXT: kmovd %k0, %eax 19330; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19331; VLX-NEXT: vzeroupper 19332; VLX-NEXT: retq 19333; 19334; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: 19335; NoVLX: # %bb.0: # %entry 19336; NoVLX-NEXT: kmovw %edi, %k1 19337; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19338; NoVLX-NEXT: kmovw %k0, %eax 19339; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19340; NoVLX-NEXT: vzeroupper 19341; NoVLX-NEXT: retq 19342entry: 19343 %0 = bitcast <8 x i64> %__a to <8 x i64> 19344 %1 = bitcast <8 x i64> %__b to <8 x i64> 19345 %2 = icmp ult <8 x i64> %0, %1 19346 %3 = bitcast i8 %__u to <8 x i1> 19347 %4 = and <8 x i1> %2, %3 19348 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19349 %6 = bitcast <16 x i1> %5 to i16 19350 ret i16 %6 19351} 19352 19353define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19354; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: 19355; VLX: # %bb.0: # %entry 19356; VLX-NEXT: kmovd %edi, %k1 19357; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19358; VLX-NEXT: kmovd %k0, %eax 19359; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19360; VLX-NEXT: vzeroupper 19361; VLX-NEXT: retq 19362; 19363; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: 19364; NoVLX: # %bb.0: # %entry 19365; NoVLX-NEXT: kmovw %edi, %k1 19366; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19367; NoVLX-NEXT: kmovw %k0, %eax 19368; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19369; NoVLX-NEXT: vzeroupper 19370; NoVLX-NEXT: retq 19371entry: 19372 %0 = bitcast <8 x i64> %__a to <8 x i64> 19373 %load = load <8 x i64>, <8 x i64>* %__b 19374 %1 = bitcast <8 x i64> %load to <8 x i64> 19375 %2 = icmp ult <8 x i64> %0, %1 19376 %3 = bitcast i8 %__u to <8 x i1> 19377 %4 = and <8 x i1> %2, %3 19378 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19379 %6 = bitcast <16 x i1> %5 to i16 19380 ret i16 %6 19381} 19382 19383 19384define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 19385; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: 19386; VLX: # %bb.0: # %entry 19387; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19388; VLX-NEXT: kmovd %k0, %eax 19389; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19390; VLX-NEXT: vzeroupper 19391; VLX-NEXT: retq 19392; 19393; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: 19394; NoVLX: # %bb.0: # %entry 19395; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19396; NoVLX-NEXT: kmovw %k0, %eax 19397; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19398; NoVLX-NEXT: vzeroupper 19399; NoVLX-NEXT: retq 19400entry: 19401 %0 = bitcast <8 x i64> %__a to <8 x i64> 19402 %load = load i64, i64* %__b 19403 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19404 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19405 %2 = icmp ult <8 x i64> %0, %1 19406 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19407 %4 = bitcast <16 x i1> %3 to i16 19408 ret i16 %4 19409} 19410 19411define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 19412; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: 19413; VLX: # %bb.0: # %entry 19414; VLX-NEXT: kmovd %edi, %k1 19415; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19416; VLX-NEXT: kmovd %k0, %eax 19417; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19418; VLX-NEXT: vzeroupper 19419; VLX-NEXT: retq 19420; 19421; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: 19422; NoVLX: # %bb.0: # %entry 19423; NoVLX-NEXT: kmovw %edi, %k1 19424; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19425; NoVLX-NEXT: kmovw %k0, %eax 19426; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19427; NoVLX-NEXT: vzeroupper 19428; NoVLX-NEXT: retq 19429entry: 19430 %0 = bitcast <8 x i64> %__a to <8 x i64> 19431 %load = load i64, i64* %__b 19432 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19433 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19434 %2 = icmp ult <8 x i64> %0, %1 19435 %3 = bitcast i8 %__u to <8 x i1> 19436 %4 = and <8 x i1> %3, %2 19437 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19438 %6 = bitcast <16 x i1> %5 to i16 19439 ret i16 %6 19440} 19441 19442 19443define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19444; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: 19445; VLX: # %bb.0: # %entry 19446; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19447; VLX-NEXT: kmovd %k0, %eax 19448; VLX-NEXT: vzeroupper 19449; VLX-NEXT: retq 19450; 19451; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: 19452; NoVLX: # %bb.0: # %entry 19453; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19454; NoVLX-NEXT: kmovw %k0, %eax 19455; NoVLX-NEXT: vzeroupper 19456; NoVLX-NEXT: retq 19457entry: 19458 %0 = bitcast <8 x i64> %__a to <8 x i64> 19459 %1 = bitcast <8 x i64> %__b to <8 x i64> 19460 %2 = icmp ult <8 x i64> %0, %1 19461 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19462 %4 = bitcast <32 x i1> %3 to i32 19463 ret i32 %4 19464} 19465 19466define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19467; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: 19468; VLX: # %bb.0: # %entry 19469; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19470; VLX-NEXT: kmovd %k0, %eax 19471; VLX-NEXT: vzeroupper 19472; VLX-NEXT: retq 19473; 19474; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: 19475; NoVLX: # %bb.0: # %entry 19476; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19477; NoVLX-NEXT: kmovw %k0, %eax 19478; NoVLX-NEXT: vzeroupper 19479; NoVLX-NEXT: retq 19480entry: 19481 %0 = bitcast <8 x i64> %__a to <8 x i64> 19482 %load = load <8 x i64>, <8 x i64>* %__b 19483 %1 = bitcast <8 x i64> %load to <8 x i64> 19484 %2 = icmp ult <8 x i64> %0, %1 19485 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19486 %4 = bitcast <32 x i1> %3 to i32 19487 ret i32 %4 19488} 19489 19490define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19491; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: 19492; VLX: # %bb.0: # %entry 19493; VLX-NEXT: kmovd %edi, %k1 19494; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19495; VLX-NEXT: kmovd %k0, %eax 19496; VLX-NEXT: vzeroupper 19497; VLX-NEXT: retq 19498; 19499; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: 19500; NoVLX: # %bb.0: # %entry 19501; NoVLX-NEXT: kmovw %edi, %k1 19502; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19503; NoVLX-NEXT: kmovw %k0, %eax 19504; NoVLX-NEXT: vzeroupper 19505; NoVLX-NEXT: retq 19506entry: 19507 %0 = bitcast <8 x i64> %__a to <8 x i64> 19508 %1 = bitcast <8 x i64> %__b to <8 x i64> 19509 %2 = icmp ult <8 x i64> %0, %1 19510 %3 = bitcast i8 %__u to <8 x i1> 19511 %4 = and <8 x i1> %2, %3 19512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19513 %6 = bitcast <32 x i1> %5 to i32 19514 ret i32 %6 19515} 19516 19517define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19518; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: 19519; VLX: # %bb.0: # %entry 19520; VLX-NEXT: kmovd %edi, %k1 19521; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19522; VLX-NEXT: kmovd %k0, %eax 19523; VLX-NEXT: vzeroupper 19524; VLX-NEXT: retq 19525; 19526; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: 19527; NoVLX: # %bb.0: # %entry 19528; NoVLX-NEXT: kmovw %edi, %k1 19529; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19530; NoVLX-NEXT: kmovw %k0, %eax 19531; NoVLX-NEXT: vzeroupper 19532; NoVLX-NEXT: retq 19533entry: 19534 %0 = bitcast <8 x i64> %__a to <8 x i64> 19535 %load = load <8 x i64>, <8 x i64>* %__b 19536 %1 = bitcast <8 x i64> %load to <8 x i64> 19537 %2 = icmp ult <8 x i64> %0, %1 19538 %3 = bitcast i8 %__u to <8 x i1> 19539 %4 = and <8 x i1> %2, %3 19540 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19541 %6 = bitcast <32 x i1> %5 to i32 19542 ret i32 %6 19543} 19544 19545 19546define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 19547; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: 19548; VLX: # %bb.0: # %entry 19549; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19550; VLX-NEXT: kmovd %k0, %eax 19551; VLX-NEXT: vzeroupper 19552; VLX-NEXT: retq 19553; 19554; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: 19555; NoVLX: # %bb.0: # %entry 19556; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19557; NoVLX-NEXT: kmovw %k0, %eax 19558; NoVLX-NEXT: vzeroupper 19559; NoVLX-NEXT: retq 19560entry: 19561 %0 = bitcast <8 x i64> %__a to <8 x i64> 19562 %load = load i64, i64* %__b 19563 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19564 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19565 %2 = icmp ult <8 x i64> %0, %1 19566 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19567 %4 = bitcast <32 x i1> %3 to i32 19568 ret i32 %4 19569} 19570 19571define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 19572; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: 19573; VLX: # %bb.0: # %entry 19574; VLX-NEXT: kmovd %edi, %k1 19575; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19576; VLX-NEXT: kmovd %k0, %eax 19577; VLX-NEXT: vzeroupper 19578; VLX-NEXT: retq 19579; 19580; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: 19581; NoVLX: # %bb.0: # %entry 19582; NoVLX-NEXT: kmovw %edi, %k1 19583; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19584; NoVLX-NEXT: kmovw %k0, %eax 19585; NoVLX-NEXT: vzeroupper 19586; NoVLX-NEXT: retq 19587entry: 19588 %0 = bitcast <8 x i64> %__a to <8 x i64> 19589 %load = load i64, i64* %__b 19590 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19591 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19592 %2 = icmp ult <8 x i64> %0, %1 19593 %3 = bitcast i8 %__u to <8 x i1> 19594 %4 = and <8 x i1> %3, %2 19595 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19596 %6 = bitcast <32 x i1> %5 to i32 19597 ret i32 %6 19598} 19599 19600 19601define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19602; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: 19603; VLX: # %bb.0: # %entry 19604; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19605; VLX-NEXT: kmovq %k0, %rax 19606; VLX-NEXT: vzeroupper 19607; VLX-NEXT: retq 19608; 19609; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: 19610; NoVLX: # %bb.0: # %entry 19611; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19612; NoVLX-NEXT: kmovw %k0, %eax 19613; NoVLX-NEXT: movzwl %ax, %eax 19614; NoVLX-NEXT: vzeroupper 19615; NoVLX-NEXT: retq 19616entry: 19617 %0 = bitcast <8 x i64> %__a to <8 x i64> 19618 %1 = bitcast <8 x i64> %__b to <8 x i64> 19619 %2 = icmp ult <8 x i64> %0, %1 19620 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19621 %4 = bitcast <64 x i1> %3 to i64 19622 ret i64 %4 19623} 19624 19625define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19626; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: 19627; VLX: # %bb.0: # %entry 19628; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19629; VLX-NEXT: kmovq %k0, %rax 19630; VLX-NEXT: vzeroupper 19631; VLX-NEXT: retq 19632; 19633; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: 19634; NoVLX: # %bb.0: # %entry 19635; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19636; NoVLX-NEXT: kmovw %k0, %eax 19637; NoVLX-NEXT: movzwl %ax, %eax 19638; NoVLX-NEXT: vzeroupper 19639; NoVLX-NEXT: retq 19640entry: 19641 %0 = bitcast <8 x i64> %__a to <8 x i64> 19642 %load = load <8 x i64>, <8 x i64>* %__b 19643 %1 = bitcast <8 x i64> %load to <8 x i64> 19644 %2 = icmp ult <8 x i64> %0, %1 19645 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19646 %4 = bitcast <64 x i1> %3 to i64 19647 ret i64 %4 19648} 19649 19650define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19651; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: 19652; VLX: # %bb.0: # %entry 19653; VLX-NEXT: kmovd %edi, %k1 19654; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19655; VLX-NEXT: kmovq %k0, %rax 19656; VLX-NEXT: vzeroupper 19657; VLX-NEXT: retq 19658; 19659; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: 19660; NoVLX: # %bb.0: # %entry 19661; NoVLX-NEXT: kmovw %edi, %k1 19662; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19663; NoVLX-NEXT: kmovw %k0, %eax 19664; NoVLX-NEXT: movzwl %ax, %eax 19665; NoVLX-NEXT: vzeroupper 19666; NoVLX-NEXT: retq 19667entry: 19668 %0 = bitcast <8 x i64> %__a to <8 x i64> 19669 %1 = bitcast <8 x i64> %__b to <8 x i64> 19670 %2 = icmp ult <8 x i64> %0, %1 19671 %3 = bitcast i8 %__u to <8 x i1> 19672 %4 = and <8 x i1> %2, %3 19673 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19674 %6 = bitcast <64 x i1> %5 to i64 19675 ret i64 %6 19676} 19677 19678define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 19679; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: 19680; VLX: # %bb.0: # %entry 19681; VLX-NEXT: kmovd %edi, %k1 19682; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19683; VLX-NEXT: kmovq %k0, %rax 19684; VLX-NEXT: vzeroupper 19685; VLX-NEXT: retq 19686; 19687; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: 19688; NoVLX: # %bb.0: # %entry 19689; NoVLX-NEXT: kmovw %edi, %k1 19690; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19691; NoVLX-NEXT: kmovw %k0, %eax 19692; NoVLX-NEXT: movzwl %ax, %eax 19693; NoVLX-NEXT: vzeroupper 19694; NoVLX-NEXT: retq 19695entry: 19696 %0 = bitcast <8 x i64> %__a to <8 x i64> 19697 %load = load <8 x i64>, <8 x i64>* %__b 19698 %1 = bitcast <8 x i64> %load to <8 x i64> 19699 %2 = icmp ult <8 x i64> %0, %1 19700 %3 = bitcast i8 %__u to <8 x i1> 19701 %4 = and <8 x i1> %2, %3 19702 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19703 %6 = bitcast <64 x i1> %5 to i64 19704 ret i64 %6 19705} 19706 19707 19708define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { 19709; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: 19710; VLX: # %bb.0: # %entry 19711; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19712; VLX-NEXT: kmovq %k0, %rax 19713; VLX-NEXT: vzeroupper 19714; VLX-NEXT: retq 19715; 19716; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: 19717; NoVLX: # %bb.0: # %entry 19718; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19719; NoVLX-NEXT: kmovw %k0, %eax 19720; NoVLX-NEXT: movzwl %ax, %eax 19721; NoVLX-NEXT: vzeroupper 19722; NoVLX-NEXT: retq 19723entry: 19724 %0 = bitcast <8 x i64> %__a to <8 x i64> 19725 %load = load i64, i64* %__b 19726 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19727 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19728 %2 = icmp ult <8 x i64> %0, %1 19729 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19730 %4 = bitcast <64 x i1> %3 to i64 19731 ret i64 %4 19732} 19733 19734define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { 19735; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: 19736; VLX: # %bb.0: # %entry 19737; VLX-NEXT: kmovd %edi, %k1 19738; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19739; VLX-NEXT: kmovq %k0, %rax 19740; VLX-NEXT: vzeroupper 19741; VLX-NEXT: retq 19742; 19743; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: 19744; NoVLX: # %bb.0: # %entry 19745; NoVLX-NEXT: kmovw %edi, %k1 19746; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19747; NoVLX-NEXT: kmovw %k0, %eax 19748; NoVLX-NEXT: movzwl %ax, %eax 19749; NoVLX-NEXT: vzeroupper 19750; NoVLX-NEXT: retq 19751entry: 19752 %0 = bitcast <8 x i64> %__a to <8 x i64> 19753 %load = load i64, i64* %__b 19754 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19755 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19756 %2 = icmp ult <8 x i64> %0, %1 19757 %3 = bitcast i8 %__u to <8 x i1> 19758 %4 = and <8 x i1> %3, %2 19759 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19760 %6 = bitcast <64 x i1> %5 to i64 19761 ret i64 %6 19762} 19763 19764 19765declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) 19766define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19767; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: 19768; VLX: # %bb.0: # %entry 19769; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19770; VLX-NEXT: kmovd %k0, %eax 19771; VLX-NEXT: # kill: def $al killed $al killed $eax 19772; VLX-NEXT: retq 19773; 19774; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: 19775; NoVLX: # %bb.0: # %entry 19776; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19777; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19778; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19779; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19780; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19781; NoVLX-NEXT: kmovw %k0, %eax 19782; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19783; NoVLX-NEXT: vzeroupper 19784; NoVLX-NEXT: retq 19785entry: 19786 %0 = bitcast <2 x i64> %__a to <4 x float> 19787 %1 = bitcast <2 x i64> %__b to <4 x float> 19788 %2 = fcmp oeq <4 x float> %0, %1 19789 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19790 %4 = bitcast <8 x i1> %3 to i8 19791 ret i8 %4 19792} 19793 19794define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 19795; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: 19796; VLX: # %bb.0: # %entry 19797; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19798; VLX-NEXT: kmovd %k0, %eax 19799; VLX-NEXT: # kill: def $al killed $al killed $eax 19800; VLX-NEXT: retq 19801; 19802; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: 19803; NoVLX: # %bb.0: # %entry 19804; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19805; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19806; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19807; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19808; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19809; NoVLX-NEXT: kmovw %k0, %eax 19810; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19811; NoVLX-NEXT: vzeroupper 19812; NoVLX-NEXT: retq 19813entry: 19814 %0 = bitcast <2 x i64> %__a to <4 x float> 19815 %load = load <2 x i64>, <2 x i64>* %__b 19816 %1 = bitcast <2 x i64> %load to <4 x float> 19817 %2 = fcmp oeq <4 x float> %0, %1 19818 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19819 %4 = bitcast <8 x i1> %3 to i8 19820 ret i8 %4 19821} 19822 19823define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { 19824; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19825; VLX: # %bb.0: # %entry 19826; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 19827; VLX-NEXT: kmovd %k0, %eax 19828; VLX-NEXT: # kill: def $al killed $al killed $eax 19829; VLX-NEXT: retq 19830; 19831; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19832; NoVLX: # %bb.0: # %entry 19833; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19834; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 19835; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19836; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19837; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19838; NoVLX-NEXT: kmovw %k0, %eax 19839; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19840; NoVLX-NEXT: vzeroupper 19841; NoVLX-NEXT: retq 19842entry: 19843 %0 = bitcast <2 x i64> %__a to <4 x float> 19844 %load = load float, float* %__b 19845 %vec = insertelement <4 x float> undef, float %load, i32 0 19846 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19847 %2 = fcmp oeq <4 x float> %0, %1 19848 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19849 %4 = bitcast <8 x i1> %3 to i8 19850 ret i8 %4 19851} 19852 19853define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19854; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: 19855; VLX: # %bb.0: # %entry 19856; VLX-NEXT: kmovd %edi, %k1 19857; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 19858; VLX-NEXT: kmovd %k0, %eax 19859; VLX-NEXT: # kill: def $al killed $al killed $eax 19860; VLX-NEXT: retq 19861; 19862; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: 19863; NoVLX: # %bb.0: # %entry 19864; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19865; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19866; NoVLX-NEXT: kmovw %edi, %k1 19867; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19868; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19869; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19870; NoVLX-NEXT: kmovw %k0, %eax 19871; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19872; NoVLX-NEXT: vzeroupper 19873; NoVLX-NEXT: retq 19874entry: 19875 %0 = bitcast <2 x i64> %__a to <4 x float> 19876 %1 = bitcast <2 x i64> %__b to <4 x float> 19877 %2 = fcmp oeq <4 x float> %0, %1 19878 %3 = bitcast i4 %__u to <4 x i1> 19879 %4 = and <4 x i1> %2, %3 19880 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19881 %6 = bitcast <8 x i1> %5 to i8 19882 ret i8 %6 19883} 19884 19885define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 19886; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: 19887; VLX: # %bb.0: # %entry 19888; VLX-NEXT: kmovd %edi, %k1 19889; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 19890; VLX-NEXT: kmovd %k0, %eax 19891; VLX-NEXT: # kill: def $al killed $al killed $eax 19892; VLX-NEXT: retq 19893; 19894; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: 19895; NoVLX: # %bb.0: # %entry 19896; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19897; NoVLX-NEXT: kmovw %edi, %k1 19898; NoVLX-NEXT: vmovaps (%rsi), %xmm1 19899; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19900; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19901; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19902; NoVLX-NEXT: kmovw %k0, %eax 19903; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19904; NoVLX-NEXT: vzeroupper 19905; NoVLX-NEXT: retq 19906entry: 19907 %0 = bitcast <2 x i64> %__a to <4 x float> 19908 %load = load <2 x i64>, <2 x i64>* %__b 19909 %1 = bitcast <2 x i64> %load to <4 x float> 19910 %2 = fcmp oeq <4 x float> %0, %1 19911 %3 = bitcast i4 %__u to <4 x i1> 19912 %4 = and <4 x i1> %2, %3 19913 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19914 %6 = bitcast <8 x i1> %5 to i8 19915 ret i8 %6 19916} 19917 19918define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { 19919; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19920; VLX: # %bb.0: # %entry 19921; VLX-NEXT: kmovd %edi, %k1 19922; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 19923; VLX-NEXT: kmovd %k0, %eax 19924; VLX-NEXT: # kill: def $al killed $al killed $eax 19925; VLX-NEXT: retq 19926; 19927; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19928; NoVLX: # %bb.0: # %entry 19929; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19930; NoVLX-NEXT: kmovw %edi, %k1 19931; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 19932; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19933; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19934; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19935; NoVLX-NEXT: kmovw %k0, %eax 19936; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19937; NoVLX-NEXT: vzeroupper 19938; NoVLX-NEXT: retq 19939entry: 19940 %0 = bitcast <2 x i64> %__a to <4 x float> 19941 %load = load float, float* %__b 19942 %vec = insertelement <4 x float> undef, float %load, i32 0 19943 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19944 %2 = fcmp oeq <4 x float> %0, %1 19945 %3 = bitcast i4 %__u to <4 x i1> 19946 %4 = and <4 x i1> %2, %3 19947 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19948 %6 = bitcast <8 x i1> %5 to i8 19949 ret i8 %6 19950} 19951 19952 19953 19954define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19955; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: 19956; VLX: # %bb.0: # %entry 19957; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19958; VLX-NEXT: kmovd %k0, %eax 19959; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19960; VLX-NEXT: retq 19961; 19962; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: 19963; NoVLX: # %bb.0: # %entry 19964; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19965; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19966; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19967; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19968; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19969; NoVLX-NEXT: kmovw %k0, %eax 19970; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19971; NoVLX-NEXT: vzeroupper 19972; NoVLX-NEXT: retq 19973entry: 19974 %0 = bitcast <2 x i64> %__a to <4 x float> 19975 %1 = bitcast <2 x i64> %__b to <4 x float> 19976 %2 = fcmp oeq <4 x float> %0, %1 19977 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19978 %4 = bitcast <16 x i1> %3 to i16 19979 ret i16 %4 19980} 19981 19982define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 19983; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: 19984; VLX: # %bb.0: # %entry 19985; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19986; VLX-NEXT: kmovd %k0, %eax 19987; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19988; VLX-NEXT: retq 19989; 19990; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: 19991; NoVLX: # %bb.0: # %entry 19992; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19993; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19994; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19995; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19996; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19997; NoVLX-NEXT: kmovw %k0, %eax 19998; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19999; NoVLX-NEXT: vzeroupper 20000; NoVLX-NEXT: retq 20001entry: 20002 %0 = bitcast <2 x i64> %__a to <4 x float> 20003 %load = load <2 x i64>, <2 x i64>* %__b 20004 %1 = bitcast <2 x i64> %load to <4 x float> 20005 %2 = fcmp oeq <4 x float> %0, %1 20006 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20007 %4 = bitcast <16 x i1> %3 to i16 20008 ret i16 %4 20009} 20010 20011define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { 20012; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: 20013; VLX: # %bb.0: # %entry 20014; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 20015; VLX-NEXT: kmovd %k0, %eax 20016; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20017; VLX-NEXT: retq 20018; 20019; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: 20020; NoVLX: # %bb.0: # %entry 20021; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20022; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 20023; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20024; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20025; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20026; NoVLX-NEXT: kmovw %k0, %eax 20027; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20028; NoVLX-NEXT: vzeroupper 20029; NoVLX-NEXT: retq 20030entry: 20031 %0 = bitcast <2 x i64> %__a to <4 x float> 20032 %load = load float, float* %__b 20033 %vec = insertelement <4 x float> undef, float %load, i32 0 20034 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20035 %2 = fcmp oeq <4 x float> %0, %1 20036 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20037 %4 = bitcast <16 x i1> %3 to i16 20038 ret i16 %4 20039} 20040 20041define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 20042; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: 20043; VLX: # %bb.0: # %entry 20044; VLX-NEXT: kmovd %edi, %k1 20045; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 20046; VLX-NEXT: kmovd %k0, %eax 20047; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20048; VLX-NEXT: retq 20049; 20050; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: 20051; NoVLX: # %bb.0: # %entry 20052; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20053; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20054; NoVLX-NEXT: kmovw %edi, %k1 20055; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20056; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20057; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20058; NoVLX-NEXT: kmovw %k0, %eax 20059; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20060; NoVLX-NEXT: vzeroupper 20061; NoVLX-NEXT: retq 20062entry: 20063 %0 = bitcast <2 x i64> %__a to <4 x float> 20064 %1 = bitcast <2 x i64> %__b to <4 x float> 20065 %2 = fcmp oeq <4 x float> %0, %1 20066 %3 = bitcast i4 %__u to <4 x i1> 20067 %4 = and <4 x i1> %2, %3 20068 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20069 %6 = bitcast <16 x i1> %5 to i16 20070 ret i16 %6 20071} 20072 20073define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 20074; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: 20075; VLX: # %bb.0: # %entry 20076; VLX-NEXT: kmovd %edi, %k1 20077; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 20078; VLX-NEXT: kmovd %k0, %eax 20079; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20080; VLX-NEXT: retq 20081; 20082; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: 20083; NoVLX: # %bb.0: # %entry 20084; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20085; NoVLX-NEXT: kmovw %edi, %k1 20086; NoVLX-NEXT: vmovaps (%rsi), %xmm1 20087; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20088; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20089; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20090; NoVLX-NEXT: kmovw %k0, %eax 20091; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20092; NoVLX-NEXT: vzeroupper 20093; NoVLX-NEXT: retq 20094entry: 20095 %0 = bitcast <2 x i64> %__a to <4 x float> 20096 %load = load <2 x i64>, <2 x i64>* %__b 20097 %1 = bitcast <2 x i64> %load to <4 x float> 20098 %2 = fcmp oeq <4 x float> %0, %1 20099 %3 = bitcast i4 %__u to <4 x i1> 20100 %4 = and <4 x i1> %2, %3 20101 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20102 %6 = bitcast <16 x i1> %5 to i16 20103 ret i16 %6 20104} 20105 20106define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { 20107; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: 20108; VLX: # %bb.0: # %entry 20109; VLX-NEXT: kmovd %edi, %k1 20110; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 20111; VLX-NEXT: kmovd %k0, %eax 20112; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20113; VLX-NEXT: retq 20114; 20115; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: 20116; NoVLX: # %bb.0: # %entry 20117; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20118; NoVLX-NEXT: kmovw %edi, %k1 20119; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 20120; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20121; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20122; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20123; NoVLX-NEXT: kmovw %k0, %eax 20124; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20125; NoVLX-NEXT: vzeroupper 20126; NoVLX-NEXT: retq 20127entry: 20128 %0 = bitcast <2 x i64> %__a to <4 x float> 20129 %load = load float, float* %__b 20130 %vec = insertelement <4 x float> undef, float %load, i32 0 20131 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20132 %2 = fcmp oeq <4 x float> %0, %1 20133 %3 = bitcast i4 %__u to <4 x i1> 20134 %4 = and <4 x i1> %2, %3 20135 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20136 %6 = bitcast <16 x i1> %5 to i16 20137 ret i16 %6 20138} 20139 20140 20141 20142define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 20143; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: 20144; VLX: # %bb.0: # %entry 20145; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 20146; VLX-NEXT: kmovd %k0, %eax 20147; VLX-NEXT: retq 20148; 20149; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: 20150; NoVLX: # %bb.0: # %entry 20151; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20152; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20153; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20154; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20155; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20156; NoVLX-NEXT: kmovw %k0, %eax 20157; NoVLX-NEXT: vzeroupper 20158; NoVLX-NEXT: retq 20159entry: 20160 %0 = bitcast <2 x i64> %__a to <4 x float> 20161 %1 = bitcast <2 x i64> %__b to <4 x float> 20162 %2 = fcmp oeq <4 x float> %0, %1 20163 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20164 %4 = bitcast <32 x i1> %3 to i32 20165 ret i32 %4 20166} 20167 20168define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 20169; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: 20170; VLX: # %bb.0: # %entry 20171; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 20172; VLX-NEXT: kmovd %k0, %eax 20173; VLX-NEXT: retq 20174; 20175; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: 20176; NoVLX: # %bb.0: # %entry 20177; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20178; NoVLX-NEXT: vmovaps (%rdi), %xmm1 20179; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20180; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20181; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20182; NoVLX-NEXT: kmovw %k0, %eax 20183; NoVLX-NEXT: vzeroupper 20184; NoVLX-NEXT: retq 20185entry: 20186 %0 = bitcast <2 x i64> %__a to <4 x float> 20187 %load = load <2 x i64>, <2 x i64>* %__b 20188 %1 = bitcast <2 x i64> %load to <4 x float> 20189 %2 = fcmp oeq <4 x float> %0, %1 20190 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20191 %4 = bitcast <32 x i1> %3 to i32 20192 ret i32 %4 20193} 20194 20195define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { 20196; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: 20197; VLX: # %bb.0: # %entry 20198; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 20199; VLX-NEXT: kmovd %k0, %eax 20200; VLX-NEXT: retq 20201; 20202; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: 20203; NoVLX: # %bb.0: # %entry 20204; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20205; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 20206; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20207; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20208; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20209; NoVLX-NEXT: kmovw %k0, %eax 20210; NoVLX-NEXT: vzeroupper 20211; NoVLX-NEXT: retq 20212entry: 20213 %0 = bitcast <2 x i64> %__a to <4 x float> 20214 %load = load float, float* %__b 20215 %vec = insertelement <4 x float> undef, float %load, i32 0 20216 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20217 %2 = fcmp oeq <4 x float> %0, %1 20218 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20219 %4 = bitcast <32 x i1> %3 to i32 20220 ret i32 %4 20221} 20222 20223define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 20224; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: 20225; VLX: # %bb.0: # %entry 20226; VLX-NEXT: kmovd %edi, %k1 20227; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 20228; VLX-NEXT: kmovd %k0, %eax 20229; VLX-NEXT: retq 20230; 20231; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: 20232; NoVLX: # %bb.0: # %entry 20233; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20234; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20235; NoVLX-NEXT: kmovw %edi, %k1 20236; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20237; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20238; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20239; NoVLX-NEXT: kmovw %k0, %eax 20240; NoVLX-NEXT: vzeroupper 20241; NoVLX-NEXT: retq 20242entry: 20243 %0 = bitcast <2 x i64> %__a to <4 x float> 20244 %1 = bitcast <2 x i64> %__b to <4 x float> 20245 %2 = fcmp oeq <4 x float> %0, %1 20246 %3 = bitcast i4 %__u to <4 x i1> 20247 %4 = and <4 x i1> %2, %3 20248 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20249 %6 = bitcast <32 x i1> %5 to i32 20250 ret i32 %6 20251} 20252 20253define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 20254; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: 20255; VLX: # %bb.0: # %entry 20256; VLX-NEXT: kmovd %edi, %k1 20257; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 20258; VLX-NEXT: kmovd %k0, %eax 20259; VLX-NEXT: retq 20260; 20261; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: 20262; NoVLX: # %bb.0: # %entry 20263; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20264; NoVLX-NEXT: kmovw %edi, %k1 20265; NoVLX-NEXT: vmovaps (%rsi), %xmm1 20266; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20267; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20268; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20269; NoVLX-NEXT: kmovw %k0, %eax 20270; NoVLX-NEXT: vzeroupper 20271; NoVLX-NEXT: retq 20272entry: 20273 %0 = bitcast <2 x i64> %__a to <4 x float> 20274 %load = load <2 x i64>, <2 x i64>* %__b 20275 %1 = bitcast <2 x i64> %load to <4 x float> 20276 %2 = fcmp oeq <4 x float> %0, %1 20277 %3 = bitcast i4 %__u to <4 x i1> 20278 %4 = and <4 x i1> %2, %3 20279 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20280 %6 = bitcast <32 x i1> %5 to i32 20281 ret i32 %6 20282} 20283 20284define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { 20285; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: 20286; VLX: # %bb.0: # %entry 20287; VLX-NEXT: kmovd %edi, %k1 20288; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 20289; VLX-NEXT: kmovd %k0, %eax 20290; VLX-NEXT: retq 20291; 20292; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: 20293; NoVLX: # %bb.0: # %entry 20294; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20295; NoVLX-NEXT: kmovw %edi, %k1 20296; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 20297; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20298; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20299; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20300; NoVLX-NEXT: kmovw %k0, %eax 20301; NoVLX-NEXT: vzeroupper 20302; NoVLX-NEXT: retq 20303entry: 20304 %0 = bitcast <2 x i64> %__a to <4 x float> 20305 %load = load float, float* %__b 20306 %vec = insertelement <4 x float> undef, float %load, i32 0 20307 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20308 %2 = fcmp oeq <4 x float> %0, %1 20309 %3 = bitcast i4 %__u to <4 x i1> 20310 %4 = and <4 x i1> %2, %3 20311 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20312 %6 = bitcast <32 x i1> %5 to i32 20313 ret i32 %6 20314} 20315 20316 20317 20318define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 20319; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: 20320; VLX: # %bb.0: # %entry 20321; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 20322; VLX-NEXT: kmovq %k0, %rax 20323; VLX-NEXT: retq 20324; 20325; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: 20326; NoVLX: # %bb.0: # %entry 20327; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20328; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20329; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20330; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20331; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20332; NoVLX-NEXT: kmovw %k0, %eax 20333; NoVLX-NEXT: movzwl %ax, %eax 20334; NoVLX-NEXT: vzeroupper 20335; NoVLX-NEXT: retq 20336entry: 20337 %0 = bitcast <2 x i64> %__a to <4 x float> 20338 %1 = bitcast <2 x i64> %__b to <4 x float> 20339 %2 = fcmp oeq <4 x float> %0, %1 20340 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20341 %4 = bitcast <64 x i1> %3 to i64 20342 ret i64 %4 20343} 20344 20345define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 20346; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: 20347; VLX: # %bb.0: # %entry 20348; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 20349; VLX-NEXT: kmovq %k0, %rax 20350; VLX-NEXT: retq 20351; 20352; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: 20353; NoVLX: # %bb.0: # %entry 20354; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20355; NoVLX-NEXT: vmovaps (%rdi), %xmm1 20356; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20357; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20358; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20359; NoVLX-NEXT: kmovw %k0, %eax 20360; NoVLX-NEXT: movzwl %ax, %eax 20361; NoVLX-NEXT: vzeroupper 20362; NoVLX-NEXT: retq 20363entry: 20364 %0 = bitcast <2 x i64> %__a to <4 x float> 20365 %load = load <2 x i64>, <2 x i64>* %__b 20366 %1 = bitcast <2 x i64> %load to <4 x float> 20367 %2 = fcmp oeq <4 x float> %0, %1 20368 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20369 %4 = bitcast <64 x i1> %3 to i64 20370 ret i64 %4 20371} 20372 20373define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { 20374; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20375; VLX: # %bb.0: # %entry 20376; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 20377; VLX-NEXT: kmovq %k0, %rax 20378; VLX-NEXT: retq 20379; 20380; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20381; NoVLX: # %bb.0: # %entry 20382; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20383; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 20384; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20385; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20386; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20387; NoVLX-NEXT: kmovw %k0, %eax 20388; NoVLX-NEXT: movzwl %ax, %eax 20389; NoVLX-NEXT: vzeroupper 20390; NoVLX-NEXT: retq 20391entry: 20392 %0 = bitcast <2 x i64> %__a to <4 x float> 20393 %load = load float, float* %__b 20394 %vec = insertelement <4 x float> undef, float %load, i32 0 20395 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20396 %2 = fcmp oeq <4 x float> %0, %1 20397 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20398 %4 = bitcast <64 x i1> %3 to i64 20399 ret i64 %4 20400} 20401 20402define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 20403; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: 20404; VLX: # %bb.0: # %entry 20405; VLX-NEXT: kmovd %edi, %k1 20406; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 20407; VLX-NEXT: kmovq %k0, %rax 20408; VLX-NEXT: retq 20409; 20410; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: 20411; NoVLX: # %bb.0: # %entry 20412; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20413; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20414; NoVLX-NEXT: kmovw %edi, %k1 20415; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20416; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20417; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20418; NoVLX-NEXT: kmovw %k0, %eax 20419; NoVLX-NEXT: movzwl %ax, %eax 20420; NoVLX-NEXT: vzeroupper 20421; NoVLX-NEXT: retq 20422entry: 20423 %0 = bitcast <2 x i64> %__a to <4 x float> 20424 %1 = bitcast <2 x i64> %__b to <4 x float> 20425 %2 = fcmp oeq <4 x float> %0, %1 20426 %3 = bitcast i4 %__u to <4 x i1> 20427 %4 = and <4 x i1> %2, %3 20428 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20429 %6 = bitcast <64 x i1> %5 to i64 20430 ret i64 %6 20431} 20432 20433define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 20434; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: 20435; VLX: # %bb.0: # %entry 20436; VLX-NEXT: kmovd %edi, %k1 20437; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 20438; VLX-NEXT: kmovq %k0, %rax 20439; VLX-NEXT: retq 20440; 20441; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: 20442; NoVLX: # %bb.0: # %entry 20443; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20444; NoVLX-NEXT: kmovw %edi, %k1 20445; NoVLX-NEXT: vmovaps (%rsi), %xmm1 20446; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20447; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20448; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20449; NoVLX-NEXT: kmovw %k0, %eax 20450; NoVLX-NEXT: movzwl %ax, %eax 20451; NoVLX-NEXT: vzeroupper 20452; NoVLX-NEXT: retq 20453entry: 20454 %0 = bitcast <2 x i64> %__a to <4 x float> 20455 %load = load <2 x i64>, <2 x i64>* %__b 20456 %1 = bitcast <2 x i64> %load to <4 x float> 20457 %2 = fcmp oeq <4 x float> %0, %1 20458 %3 = bitcast i4 %__u to <4 x i1> 20459 %4 = and <4 x i1> %2, %3 20460 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20461 %6 = bitcast <64 x i1> %5 to i64 20462 ret i64 %6 20463} 20464 20465define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr { 20466; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20467; VLX: # %bb.0: # %entry 20468; VLX-NEXT: kmovd %edi, %k1 20469; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 20470; VLX-NEXT: kmovq %k0, %rax 20471; VLX-NEXT: retq 20472; 20473; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20474; NoVLX: # %bb.0: # %entry 20475; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20476; NoVLX-NEXT: kmovw %edi, %k1 20477; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 20478; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20479; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20480; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20481; NoVLX-NEXT: kmovw %k0, %eax 20482; NoVLX-NEXT: movzwl %ax, %eax 20483; NoVLX-NEXT: vzeroupper 20484; NoVLX-NEXT: retq 20485entry: 20486 %0 = bitcast <2 x i64> %__a to <4 x float> 20487 %load = load float, float* %__b 20488 %vec = insertelement <4 x float> undef, float %load, i32 0 20489 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20490 %2 = fcmp oeq <4 x float> %0, %1 20491 %3 = bitcast i4 %__u to <4 x i1> 20492 %4 = and <4 x i1> %2, %3 20493 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20494 %6 = bitcast <64 x i1> %5 to i64 20495 ret i64 %6 20496} 20497 20498 20499 20500define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20501; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: 20502; VLX: # %bb.0: # %entry 20503; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20504; VLX-NEXT: kmovd %k0, %eax 20505; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20506; VLX-NEXT: vzeroupper 20507; VLX-NEXT: retq 20508; 20509; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: 20510; NoVLX: # %bb.0: # %entry 20511; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20512; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20513; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20514; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20515; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20516; NoVLX-NEXT: kmovw %k0, %eax 20517; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20518; NoVLX-NEXT: vzeroupper 20519; NoVLX-NEXT: retq 20520entry: 20521 %0 = bitcast <4 x i64> %__a to <8 x float> 20522 %1 = bitcast <4 x i64> %__b to <8 x float> 20523 %2 = fcmp oeq <8 x float> %0, %1 20524 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20525 %4 = bitcast <16 x i1> %3 to i16 20526 ret i16 %4 20527} 20528 20529define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20530; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: 20531; VLX: # %bb.0: # %entry 20532; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20533; VLX-NEXT: kmovd %k0, %eax 20534; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20535; VLX-NEXT: vzeroupper 20536; VLX-NEXT: retq 20537; 20538; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: 20539; NoVLX: # %bb.0: # %entry 20540; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20541; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20542; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20543; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20544; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20545; NoVLX-NEXT: kmovw %k0, %eax 20546; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20547; NoVLX-NEXT: vzeroupper 20548; NoVLX-NEXT: retq 20549entry: 20550 %0 = bitcast <4 x i64> %__a to <8 x float> 20551 %load = load <4 x i64>, <4 x i64>* %__b 20552 %1 = bitcast <4 x i64> %load to <8 x float> 20553 %2 = fcmp oeq <8 x float> %0, %1 20554 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20555 %4 = bitcast <16 x i1> %3 to i16 20556 ret i16 %4 20557} 20558 20559define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { 20560; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20561; VLX: # %bb.0: # %entry 20562; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20563; VLX-NEXT: kmovd %k0, %eax 20564; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20565; VLX-NEXT: vzeroupper 20566; VLX-NEXT: retq 20567; 20568; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20569; NoVLX: # %bb.0: # %entry 20570; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20571; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 20572; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20573; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20574; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20575; NoVLX-NEXT: kmovw %k0, %eax 20576; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20577; NoVLX-NEXT: vzeroupper 20578; NoVLX-NEXT: retq 20579entry: 20580 %0 = bitcast <4 x i64> %__a to <8 x float> 20581 %load = load float, float* %__b 20582 %vec = insertelement <8 x float> undef, float %load, i32 0 20583 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20584 %2 = fcmp oeq <8 x float> %0, %1 20585 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20586 %4 = bitcast <16 x i1> %3 to i16 20587 ret i16 %4 20588} 20589 20590define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20591; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: 20592; VLX: # %bb.0: # %entry 20593; VLX-NEXT: kmovd %edi, %k1 20594; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20595; VLX-NEXT: kmovd %k0, %eax 20596; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20597; VLX-NEXT: vzeroupper 20598; VLX-NEXT: retq 20599; 20600; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: 20601; NoVLX: # %bb.0: # %entry 20602; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20603; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20604; NoVLX-NEXT: kmovw %edi, %k1 20605; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20606; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20607; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20608; NoVLX-NEXT: kmovw %k0, %eax 20609; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20610; NoVLX-NEXT: vzeroupper 20611; NoVLX-NEXT: retq 20612entry: 20613 %0 = bitcast <4 x i64> %__a to <8 x float> 20614 %1 = bitcast <4 x i64> %__b to <8 x float> 20615 %2 = fcmp oeq <8 x float> %0, %1 20616 %3 = bitcast i8 %__u to <8 x i1> 20617 %4 = and <8 x i1> %2, %3 20618 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20619 %6 = bitcast <16 x i1> %5 to i16 20620 ret i16 %6 20621} 20622 20623define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20624; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: 20625; VLX: # %bb.0: # %entry 20626; VLX-NEXT: kmovd %edi, %k1 20627; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 20628; VLX-NEXT: kmovd %k0, %eax 20629; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20630; VLX-NEXT: vzeroupper 20631; VLX-NEXT: retq 20632; 20633; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: 20634; NoVLX: # %bb.0: # %entry 20635; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20636; NoVLX-NEXT: vmovaps (%rsi), %ymm1 20637; NoVLX-NEXT: kmovw %edi, %k1 20638; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20639; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20640; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20641; NoVLX-NEXT: kmovw %k0, %eax 20642; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20643; NoVLX-NEXT: vzeroupper 20644; NoVLX-NEXT: retq 20645entry: 20646 %0 = bitcast <4 x i64> %__a to <8 x float> 20647 %load = load <4 x i64>, <4 x i64>* %__b 20648 %1 = bitcast <4 x i64> %load to <8 x float> 20649 %2 = fcmp oeq <8 x float> %0, %1 20650 %3 = bitcast i8 %__u to <8 x i1> 20651 %4 = and <8 x i1> %2, %3 20652 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20653 %6 = bitcast <16 x i1> %5 to i16 20654 ret i16 %6 20655} 20656 20657define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { 20658; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20659; VLX: # %bb.0: # %entry 20660; VLX-NEXT: kmovd %edi, %k1 20661; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 20662; VLX-NEXT: kmovd %k0, %eax 20663; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20664; VLX-NEXT: vzeroupper 20665; VLX-NEXT: retq 20666; 20667; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20668; NoVLX: # %bb.0: # %entry 20669; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20670; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 20671; NoVLX-NEXT: kmovw %edi, %k1 20672; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20673; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20674; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20675; NoVLX-NEXT: kmovw %k0, %eax 20676; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20677; NoVLX-NEXT: vzeroupper 20678; NoVLX-NEXT: retq 20679entry: 20680 %0 = bitcast <4 x i64> %__a to <8 x float> 20681 %load = load float, float* %__b 20682 %vec = insertelement <8 x float> undef, float %load, i32 0 20683 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20684 %2 = fcmp oeq <8 x float> %0, %1 20685 %3 = bitcast i8 %__u to <8 x i1> 20686 %4 = and <8 x i1> %2, %3 20687 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20688 %6 = bitcast <16 x i1> %5 to i16 20689 ret i16 %6 20690} 20691 20692 20693 20694define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20695; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: 20696; VLX: # %bb.0: # %entry 20697; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20698; VLX-NEXT: kmovd %k0, %eax 20699; VLX-NEXT: vzeroupper 20700; VLX-NEXT: retq 20701; 20702; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: 20703; NoVLX: # %bb.0: # %entry 20704; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20705; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20706; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20707; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20708; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20709; NoVLX-NEXT: kmovw %k0, %eax 20710; NoVLX-NEXT: vzeroupper 20711; NoVLX-NEXT: retq 20712entry: 20713 %0 = bitcast <4 x i64> %__a to <8 x float> 20714 %1 = bitcast <4 x i64> %__b to <8 x float> 20715 %2 = fcmp oeq <8 x float> %0, %1 20716 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20717 %4 = bitcast <32 x i1> %3 to i32 20718 ret i32 %4 20719} 20720 20721define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20722; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: 20723; VLX: # %bb.0: # %entry 20724; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20725; VLX-NEXT: kmovd %k0, %eax 20726; VLX-NEXT: vzeroupper 20727; VLX-NEXT: retq 20728; 20729; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: 20730; NoVLX: # %bb.0: # %entry 20731; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20732; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20733; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20734; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20735; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20736; NoVLX-NEXT: kmovw %k0, %eax 20737; NoVLX-NEXT: vzeroupper 20738; NoVLX-NEXT: retq 20739entry: 20740 %0 = bitcast <4 x i64> %__a to <8 x float> 20741 %load = load <4 x i64>, <4 x i64>* %__b 20742 %1 = bitcast <4 x i64> %load to <8 x float> 20743 %2 = fcmp oeq <8 x float> %0, %1 20744 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20745 %4 = bitcast <32 x i1> %3 to i32 20746 ret i32 %4 20747} 20748 20749define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { 20750; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20751; VLX: # %bb.0: # %entry 20752; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20753; VLX-NEXT: kmovd %k0, %eax 20754; VLX-NEXT: vzeroupper 20755; VLX-NEXT: retq 20756; 20757; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20758; NoVLX: # %bb.0: # %entry 20759; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20760; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 20761; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20762; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20763; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20764; NoVLX-NEXT: kmovw %k0, %eax 20765; NoVLX-NEXT: vzeroupper 20766; NoVLX-NEXT: retq 20767entry: 20768 %0 = bitcast <4 x i64> %__a to <8 x float> 20769 %load = load float, float* %__b 20770 %vec = insertelement <8 x float> undef, float %load, i32 0 20771 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20772 %2 = fcmp oeq <8 x float> %0, %1 20773 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20774 %4 = bitcast <32 x i1> %3 to i32 20775 ret i32 %4 20776} 20777 20778define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20779; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: 20780; VLX: # %bb.0: # %entry 20781; VLX-NEXT: kmovd %edi, %k1 20782; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20783; VLX-NEXT: kmovd %k0, %eax 20784; VLX-NEXT: vzeroupper 20785; VLX-NEXT: retq 20786; 20787; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: 20788; NoVLX: # %bb.0: # %entry 20789; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20790; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20791; NoVLX-NEXT: kmovw %edi, %k1 20792; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20793; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20794; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20795; NoVLX-NEXT: kmovw %k0, %eax 20796; NoVLX-NEXT: vzeroupper 20797; NoVLX-NEXT: retq 20798entry: 20799 %0 = bitcast <4 x i64> %__a to <8 x float> 20800 %1 = bitcast <4 x i64> %__b to <8 x float> 20801 %2 = fcmp oeq <8 x float> %0, %1 20802 %3 = bitcast i8 %__u to <8 x i1> 20803 %4 = and <8 x i1> %2, %3 20804 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20805 %6 = bitcast <32 x i1> %5 to i32 20806 ret i32 %6 20807} 20808 20809define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20810; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: 20811; VLX: # %bb.0: # %entry 20812; VLX-NEXT: kmovd %edi, %k1 20813; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 20814; VLX-NEXT: kmovd %k0, %eax 20815; VLX-NEXT: vzeroupper 20816; VLX-NEXT: retq 20817; 20818; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: 20819; NoVLX: # %bb.0: # %entry 20820; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20821; NoVLX-NEXT: vmovaps (%rsi), %ymm1 20822; NoVLX-NEXT: kmovw %edi, %k1 20823; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20824; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20825; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20826; NoVLX-NEXT: kmovw %k0, %eax 20827; NoVLX-NEXT: vzeroupper 20828; NoVLX-NEXT: retq 20829entry: 20830 %0 = bitcast <4 x i64> %__a to <8 x float> 20831 %load = load <4 x i64>, <4 x i64>* %__b 20832 %1 = bitcast <4 x i64> %load to <8 x float> 20833 %2 = fcmp oeq <8 x float> %0, %1 20834 %3 = bitcast i8 %__u to <8 x i1> 20835 %4 = and <8 x i1> %2, %3 20836 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20837 %6 = bitcast <32 x i1> %5 to i32 20838 ret i32 %6 20839} 20840 20841define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { 20842; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20843; VLX: # %bb.0: # %entry 20844; VLX-NEXT: kmovd %edi, %k1 20845; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 20846; VLX-NEXT: kmovd %k0, %eax 20847; VLX-NEXT: vzeroupper 20848; VLX-NEXT: retq 20849; 20850; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20851; NoVLX: # %bb.0: # %entry 20852; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20853; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 20854; NoVLX-NEXT: kmovw %edi, %k1 20855; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20856; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20857; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20858; NoVLX-NEXT: kmovw %k0, %eax 20859; NoVLX-NEXT: vzeroupper 20860; NoVLX-NEXT: retq 20861entry: 20862 %0 = bitcast <4 x i64> %__a to <8 x float> 20863 %load = load float, float* %__b 20864 %vec = insertelement <8 x float> undef, float %load, i32 0 20865 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20866 %2 = fcmp oeq <8 x float> %0, %1 20867 %3 = bitcast i8 %__u to <8 x i1> 20868 %4 = and <8 x i1> %2, %3 20869 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20870 %6 = bitcast <32 x i1> %5 to i32 20871 ret i32 %6 20872} 20873 20874 20875 20876define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20877; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: 20878; VLX: # %bb.0: # %entry 20879; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20880; VLX-NEXT: kmovq %k0, %rax 20881; VLX-NEXT: vzeroupper 20882; VLX-NEXT: retq 20883; 20884; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: 20885; NoVLX: # %bb.0: # %entry 20886; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20887; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20888; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20889; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20890; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20891; NoVLX-NEXT: kmovw %k0, %eax 20892; NoVLX-NEXT: movzwl %ax, %eax 20893; NoVLX-NEXT: vzeroupper 20894; NoVLX-NEXT: retq 20895entry: 20896 %0 = bitcast <4 x i64> %__a to <8 x float> 20897 %1 = bitcast <4 x i64> %__b to <8 x float> 20898 %2 = fcmp oeq <8 x float> %0, %1 20899 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20900 %4 = bitcast <64 x i1> %3 to i64 20901 ret i64 %4 20902} 20903 20904define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20905; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: 20906; VLX: # %bb.0: # %entry 20907; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20908; VLX-NEXT: kmovq %k0, %rax 20909; VLX-NEXT: vzeroupper 20910; VLX-NEXT: retq 20911; 20912; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: 20913; NoVLX: # %bb.0: # %entry 20914; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20915; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20916; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20917; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20918; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20919; NoVLX-NEXT: kmovw %k0, %eax 20920; NoVLX-NEXT: movzwl %ax, %eax 20921; NoVLX-NEXT: vzeroupper 20922; NoVLX-NEXT: retq 20923entry: 20924 %0 = bitcast <4 x i64> %__a to <8 x float> 20925 %load = load <4 x i64>, <4 x i64>* %__b 20926 %1 = bitcast <4 x i64> %load to <8 x float> 20927 %2 = fcmp oeq <8 x float> %0, %1 20928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20929 %4 = bitcast <64 x i1> %3 to i64 20930 ret i64 %4 20931} 20932 20933define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { 20934; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20935; VLX: # %bb.0: # %entry 20936; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20937; VLX-NEXT: kmovq %k0, %rax 20938; VLX-NEXT: vzeroupper 20939; VLX-NEXT: retq 20940; 20941; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20942; NoVLX: # %bb.0: # %entry 20943; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20944; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 20945; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20946; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20947; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20948; NoVLX-NEXT: kmovw %k0, %eax 20949; NoVLX-NEXT: movzwl %ax, %eax 20950; NoVLX-NEXT: vzeroupper 20951; NoVLX-NEXT: retq 20952entry: 20953 %0 = bitcast <4 x i64> %__a to <8 x float> 20954 %load = load float, float* %__b 20955 %vec = insertelement <8 x float> undef, float %load, i32 0 20956 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20957 %2 = fcmp oeq <8 x float> %0, %1 20958 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20959 %4 = bitcast <64 x i1> %3 to i64 20960 ret i64 %4 20961} 20962 20963define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20964; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: 20965; VLX: # %bb.0: # %entry 20966; VLX-NEXT: kmovd %edi, %k1 20967; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20968; VLX-NEXT: kmovq %k0, %rax 20969; VLX-NEXT: vzeroupper 20970; VLX-NEXT: retq 20971; 20972; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: 20973; NoVLX: # %bb.0: # %entry 20974; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20975; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20976; NoVLX-NEXT: kmovw %edi, %k1 20977; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20978; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20979; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20980; NoVLX-NEXT: kmovw %k0, %eax 20981; NoVLX-NEXT: movzwl %ax, %eax 20982; NoVLX-NEXT: vzeroupper 20983; NoVLX-NEXT: retq 20984entry: 20985 %0 = bitcast <4 x i64> %__a to <8 x float> 20986 %1 = bitcast <4 x i64> %__b to <8 x float> 20987 %2 = fcmp oeq <8 x float> %0, %1 20988 %3 = bitcast i8 %__u to <8 x i1> 20989 %4 = and <8 x i1> %2, %3 20990 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20991 %6 = bitcast <64 x i1> %5 to i64 20992 ret i64 %6 20993} 20994 20995define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 20996; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: 20997; VLX: # %bb.0: # %entry 20998; VLX-NEXT: kmovd %edi, %k1 20999; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 21000; VLX-NEXT: kmovq %k0, %rax 21001; VLX-NEXT: vzeroupper 21002; VLX-NEXT: retq 21003; 21004; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: 21005; NoVLX: # %bb.0: # %entry 21006; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 21007; NoVLX-NEXT: vmovaps (%rsi), %ymm1 21008; NoVLX-NEXT: kmovw %edi, %k1 21009; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 21010; NoVLX-NEXT: kshiftlw $8, %k0, %k0 21011; NoVLX-NEXT: kshiftrw $8, %k0, %k0 21012; NoVLX-NEXT: kmovw %k0, %eax 21013; NoVLX-NEXT: movzwl %ax, %eax 21014; NoVLX-NEXT: vzeroupper 21015; NoVLX-NEXT: retq 21016entry: 21017 %0 = bitcast <4 x i64> %__a to <8 x float> 21018 %load = load <4 x i64>, <4 x i64>* %__b 21019 %1 = bitcast <4 x i64> %load to <8 x float> 21020 %2 = fcmp oeq <8 x float> %0, %1 21021 %3 = bitcast i8 %__u to <8 x i1> 21022 %4 = and <8 x i1> %2, %3 21023 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 21024 %6 = bitcast <64 x i1> %5 to i64 21025 ret i64 %6 21026} 21027 21028define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr { 21029; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: 21030; VLX: # %bb.0: # %entry 21031; VLX-NEXT: kmovd %edi, %k1 21032; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 21033; VLX-NEXT: kmovq %k0, %rax 21034; VLX-NEXT: vzeroupper 21035; VLX-NEXT: retq 21036; 21037; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: 21038; NoVLX: # %bb.0: # %entry 21039; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 21040; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 21041; NoVLX-NEXT: kmovw %edi, %k1 21042; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 21043; NoVLX-NEXT: kshiftlw $8, %k0, %k0 21044; NoVLX-NEXT: kshiftrw $8, %k0, %k0 21045; NoVLX-NEXT: kmovw %k0, %eax 21046; NoVLX-NEXT: movzwl %ax, %eax 21047; NoVLX-NEXT: vzeroupper 21048; NoVLX-NEXT: retq 21049entry: 21050 %0 = bitcast <4 x i64> %__a to <8 x float> 21051 %load = load float, float* %__b 21052 %vec = insertelement <8 x float> undef, float %load, i32 0 21053 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21054 %2 = fcmp oeq <8 x float> %0, %1 21055 %3 = bitcast i8 %__u to <8 x i1> 21056 %4 = and <8 x i1> %2, %3 21057 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 21058 %6 = bitcast <64 x i1> %5 to i64 21059 ret i64 %6 21060} 21061 21062 21063 21064define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21065; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: 21066; VLX: # %bb.0: # %entry 21067; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21068; VLX-NEXT: kmovd %k0, %eax 21069; VLX-NEXT: vzeroupper 21070; VLX-NEXT: retq 21071; 21072; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: 21073; NoVLX: # %bb.0: # %entry 21074; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21075; NoVLX-NEXT: kmovw %k0, %eax 21076; NoVLX-NEXT: vzeroupper 21077; NoVLX-NEXT: retq 21078entry: 21079 %0 = bitcast <8 x i64> %__a to <16 x float> 21080 %1 = bitcast <8 x i64> %__b to <16 x float> 21081 %2 = fcmp oeq <16 x float> %0, %1 21082 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21083 %4 = bitcast <32 x i1> %3 to i32 21084 ret i32 %4 21085} 21086 21087define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 21088; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: 21089; VLX: # %bb.0: # %entry 21090; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 21091; VLX-NEXT: kmovd %k0, %eax 21092; VLX-NEXT: vzeroupper 21093; VLX-NEXT: retq 21094; 21095; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: 21096; NoVLX: # %bb.0: # %entry 21097; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 21098; NoVLX-NEXT: kmovw %k0, %eax 21099; NoVLX-NEXT: vzeroupper 21100; NoVLX-NEXT: retq 21101entry: 21102 %0 = bitcast <8 x i64> %__a to <16 x float> 21103 %load = load <8 x i64>, <8 x i64>* %__b 21104 %1 = bitcast <8 x i64> %load to <16 x float> 21105 %2 = fcmp oeq <16 x float> %0, %1 21106 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21107 %4 = bitcast <32 x i1> %3 to i32 21108 ret i32 %4 21109} 21110 21111define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { 21112; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: 21113; VLX: # %bb.0: # %entry 21114; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 21115; VLX-NEXT: kmovd %k0, %eax 21116; VLX-NEXT: vzeroupper 21117; VLX-NEXT: retq 21118; 21119; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: 21120; NoVLX: # %bb.0: # %entry 21121; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 21122; NoVLX-NEXT: kmovw %k0, %eax 21123; NoVLX-NEXT: vzeroupper 21124; NoVLX-NEXT: retq 21125entry: 21126 %0 = bitcast <8 x i64> %__a to <16 x float> 21127 %load = load float, float* %__b 21128 %vec = insertelement <16 x float> undef, float %load, i32 0 21129 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21130 %2 = fcmp oeq <16 x float> %0, %1 21131 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21132 %4 = bitcast <32 x i1> %3 to i32 21133 ret i32 %4 21134} 21135 21136define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21137; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: 21138; VLX: # %bb.0: # %entry 21139; VLX-NEXT: kmovd %edi, %k1 21140; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 21141; VLX-NEXT: kmovd %k0, %eax 21142; VLX-NEXT: vzeroupper 21143; VLX-NEXT: retq 21144; 21145; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: 21146; NoVLX: # %bb.0: # %entry 21147; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21148; NoVLX-NEXT: kmovw %k0, %eax 21149; NoVLX-NEXT: andl %edi, %eax 21150; NoVLX-NEXT: vzeroupper 21151; NoVLX-NEXT: retq 21152entry: 21153 %0 = bitcast <8 x i64> %__a to <16 x float> 21154 %1 = bitcast <8 x i64> %__b to <16 x float> 21155 %2 = fcmp oeq <16 x float> %0, %1 21156 %3 = bitcast i16 %__u to <16 x i1> 21157 %4 = and <16 x i1> %2, %3 21158 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21159 %6 = bitcast <32 x i1> %5 to i32 21160 ret i32 %6 21161} 21162 21163define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 21164; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: 21165; VLX: # %bb.0: # %entry 21166; VLX-NEXT: kmovd %edi, %k1 21167; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} 21168; VLX-NEXT: kmovd %k0, %eax 21169; VLX-NEXT: vzeroupper 21170; VLX-NEXT: retq 21171; 21172; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: 21173; NoVLX: # %bb.0: # %entry 21174; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 21175; NoVLX-NEXT: kmovw %k0, %eax 21176; NoVLX-NEXT: andl %edi, %eax 21177; NoVLX-NEXT: vzeroupper 21178; NoVLX-NEXT: retq 21179entry: 21180 %0 = bitcast <8 x i64> %__a to <16 x float> 21181 %load = load <8 x i64>, <8 x i64>* %__b 21182 %1 = bitcast <8 x i64> %load to <16 x float> 21183 %2 = fcmp oeq <16 x float> %0, %1 21184 %3 = bitcast i16 %__u to <16 x i1> 21185 %4 = and <16 x i1> %2, %3 21186 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21187 %6 = bitcast <32 x i1> %5 to i32 21188 ret i32 %6 21189} 21190 21191define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr { 21192; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: 21193; VLX: # %bb.0: # %entry 21194; VLX-NEXT: kmovd %edi, %k1 21195; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 21196; VLX-NEXT: kmovd %k0, %eax 21197; VLX-NEXT: vzeroupper 21198; VLX-NEXT: retq 21199; 21200; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: 21201; NoVLX: # %bb.0: # %entry 21202; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 21203; NoVLX-NEXT: kmovw %k0, %eax 21204; NoVLX-NEXT: andl %edi, %eax 21205; NoVLX-NEXT: vzeroupper 21206; NoVLX-NEXT: retq 21207entry: 21208 %0 = bitcast <8 x i64> %__a to <16 x float> 21209 %load = load float, float* %__b 21210 %vec = insertelement <16 x float> undef, float %load, i32 0 21211 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21212 %2 = fcmp oeq <16 x float> %0, %1 21213 %3 = bitcast i16 %__u to <16 x i1> 21214 %4 = and <16 x i1> %2, %3 21215 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 21216 %6 = bitcast <32 x i1> %5 to i32 21217 ret i32 %6 21218} 21219 21220 21221 21222define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21223; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: 21224; CHECK: # %bb.0: # %entry 21225; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21226; CHECK-NEXT: kmovw %k0, %eax 21227; CHECK-NEXT: vzeroupper 21228; CHECK-NEXT: retq 21229entry: 21230 %0 = bitcast <8 x i64> %__a to <16 x float> 21231 %1 = bitcast <8 x i64> %__b to <16 x float> 21232 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) 21233 %3 = bitcast <16 x i1> %2 to i16 21234 %4 = zext i16 %3 to i32 21235 ret i32 %4 21236} 21237 21238define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21239; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: 21240; VLX: # %bb.0: # %entry 21241; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21242; VLX-NEXT: kmovd %k0, %eax 21243; VLX-NEXT: andl %edi, %eax 21244; VLX-NEXT: vzeroupper 21245; VLX-NEXT: retq 21246; 21247; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: 21248; NoVLX: # %bb.0: # %entry 21249; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21250; NoVLX-NEXT: kmovw %k0, %eax 21251; NoVLX-NEXT: andl %edi, %eax 21252; NoVLX-NEXT: vzeroupper 21253; NoVLX-NEXT: retq 21254entry: 21255 %0 = bitcast <8 x i64> %__a to <16 x float> 21256 %1 = bitcast <8 x i64> %__b to <16 x float> 21257 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) 21258 %3 = bitcast i16 %__u to <16 x i1> 21259 %4 = and <16 x i1> %2, %3 21260 %5 = bitcast <16 x i1> %4 to i16 21261 %6 = zext i16 %5 to i32 21262 ret i32 %6 21263} 21264 21265 21266 21267define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21268; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: 21269; VLX: # %bb.0: # %entry 21270; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21271; VLX-NEXT: kmovq %k0, %rax 21272; VLX-NEXT: vzeroupper 21273; VLX-NEXT: retq 21274; 21275; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: 21276; NoVLX: # %bb.0: # %entry 21277; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21278; NoVLX-NEXT: kmovw %k0, %eax 21279; NoVLX-NEXT: movzwl %ax, %eax 21280; NoVLX-NEXT: vzeroupper 21281; NoVLX-NEXT: retq 21282entry: 21283 %0 = bitcast <8 x i64> %__a to <16 x float> 21284 %1 = bitcast <8 x i64> %__b to <16 x float> 21285 %2 = fcmp oeq <16 x float> %0, %1 21286 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21287 %4 = bitcast <64 x i1> %3 to i64 21288 ret i64 %4 21289} 21290 21291define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 21292; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: 21293; VLX: # %bb.0: # %entry 21294; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 21295; VLX-NEXT: kmovq %k0, %rax 21296; VLX-NEXT: vzeroupper 21297; VLX-NEXT: retq 21298; 21299; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: 21300; NoVLX: # %bb.0: # %entry 21301; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 21302; NoVLX-NEXT: kmovw %k0, %eax 21303; NoVLX-NEXT: movzwl %ax, %eax 21304; NoVLX-NEXT: vzeroupper 21305; NoVLX-NEXT: retq 21306entry: 21307 %0 = bitcast <8 x i64> %__a to <16 x float> 21308 %load = load <8 x i64>, <8 x i64>* %__b 21309 %1 = bitcast <8 x i64> %load to <16 x float> 21310 %2 = fcmp oeq <16 x float> %0, %1 21311 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21312 %4 = bitcast <64 x i1> %3 to i64 21313 ret i64 %4 21314} 21315 21316define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { 21317; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: 21318; VLX: # %bb.0: # %entry 21319; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 21320; VLX-NEXT: kmovq %k0, %rax 21321; VLX-NEXT: vzeroupper 21322; VLX-NEXT: retq 21323; 21324; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: 21325; NoVLX: # %bb.0: # %entry 21326; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 21327; NoVLX-NEXT: kmovw %k0, %eax 21328; NoVLX-NEXT: movzwl %ax, %eax 21329; NoVLX-NEXT: vzeroupper 21330; NoVLX-NEXT: retq 21331entry: 21332 %0 = bitcast <8 x i64> %__a to <16 x float> 21333 %load = load float, float* %__b 21334 %vec = insertelement <16 x float> undef, float %load, i32 0 21335 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21336 %2 = fcmp oeq <16 x float> %0, %1 21337 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21338 %4 = bitcast <64 x i1> %3 to i64 21339 ret i64 %4 21340} 21341 21342define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21343; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: 21344; VLX: # %bb.0: # %entry 21345; VLX-NEXT: kmovd %edi, %k1 21346; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 21347; VLX-NEXT: kmovq %k0, %rax 21348; VLX-NEXT: vzeroupper 21349; VLX-NEXT: retq 21350; 21351; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: 21352; NoVLX: # %bb.0: # %entry 21353; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 21354; NoVLX-NEXT: kmovw %k0, %eax 21355; NoVLX-NEXT: andl %edi, %eax 21356; NoVLX-NEXT: vzeroupper 21357; NoVLX-NEXT: retq 21358entry: 21359 %0 = bitcast <8 x i64> %__a to <16 x float> 21360 %1 = bitcast <8 x i64> %__b to <16 x float> 21361 %2 = fcmp oeq <16 x float> %0, %1 21362 %3 = bitcast i16 %__u to <16 x i1> 21363 %4 = and <16 x i1> %2, %3 21364 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21365 %6 = bitcast <64 x i1> %5 to i64 21366 ret i64 %6 21367} 21368 21369define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 21370; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: 21371; VLX: # %bb.0: # %entry 21372; VLX-NEXT: kmovd %edi, %k1 21373; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} 21374; VLX-NEXT: kmovq %k0, %rax 21375; VLX-NEXT: vzeroupper 21376; VLX-NEXT: retq 21377; 21378; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: 21379; NoVLX: # %bb.0: # %entry 21380; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 21381; NoVLX-NEXT: kmovw %k0, %eax 21382; NoVLX-NEXT: andl %edi, %eax 21383; NoVLX-NEXT: vzeroupper 21384; NoVLX-NEXT: retq 21385entry: 21386 %0 = bitcast <8 x i64> %__a to <16 x float> 21387 %load = load <8 x i64>, <8 x i64>* %__b 21388 %1 = bitcast <8 x i64> %load to <16 x float> 21389 %2 = fcmp oeq <16 x float> %0, %1 21390 %3 = bitcast i16 %__u to <16 x i1> 21391 %4 = and <16 x i1> %2, %3 21392 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21393 %6 = bitcast <64 x i1> %5 to i64 21394 ret i64 %6 21395} 21396 21397define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr { 21398; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: 21399; VLX: # %bb.0: # %entry 21400; VLX-NEXT: kmovd %edi, %k1 21401; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 21402; VLX-NEXT: kmovq %k0, %rax 21403; VLX-NEXT: vzeroupper 21404; VLX-NEXT: retq 21405; 21406; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: 21407; NoVLX: # %bb.0: # %entry 21408; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 21409; NoVLX-NEXT: kmovw %k0, %eax 21410; NoVLX-NEXT: andl %edi, %eax 21411; NoVLX-NEXT: vzeroupper 21412; NoVLX-NEXT: retq 21413entry: 21414 %0 = bitcast <8 x i64> %__a to <16 x float> 21415 %load = load float, float* %__b 21416 %vec = insertelement <16 x float> undef, float %load, i32 0 21417 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 21418 %2 = fcmp oeq <16 x float> %0, %1 21419 %3 = bitcast i16 %__u to <16 x i1> 21420 %4 = and <16 x i1> %2, %3 21421 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 21422 %6 = bitcast <64 x i1> %5 to i64 21423 ret i64 %6 21424} 21425 21426 21427 21428define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21429; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: 21430; VLX: # %bb.0: # %entry 21431; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21432; VLX-NEXT: kmovd %k0, %eax 21433; VLX-NEXT: movzwl %ax, %eax 21434; VLX-NEXT: vzeroupper 21435; VLX-NEXT: retq 21436; 21437; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: 21438; NoVLX: # %bb.0: # %entry 21439; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21440; NoVLX-NEXT: kmovw %k0, %eax 21441; NoVLX-NEXT: movzwl %ax, %eax 21442; NoVLX-NEXT: vzeroupper 21443; NoVLX-NEXT: retq 21444entry: 21445 %0 = bitcast <8 x i64> %__a to <16 x float> 21446 %1 = bitcast <8 x i64> %__b to <16 x float> 21447 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) 21448 %3 = bitcast <16 x i1> %2 to i16 21449 %4 = zext i16 %3 to i64 21450 ret i64 %4 21451} 21452 21453define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21454; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: 21455; VLX: # %bb.0: # %entry 21456; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21457; VLX-NEXT: kmovd %k0, %eax 21458; VLX-NEXT: andl %edi, %eax 21459; VLX-NEXT: vzeroupper 21460; VLX-NEXT: retq 21461; 21462; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: 21463; NoVLX: # %bb.0: # %entry 21464; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21465; NoVLX-NEXT: kmovw %k0, %eax 21466; NoVLX-NEXT: andl %edi, %eax 21467; NoVLX-NEXT: vzeroupper 21468; NoVLX-NEXT: retq 21469entry: 21470 %0 = bitcast <8 x i64> %__a to <16 x float> 21471 %1 = bitcast <8 x i64> %__b to <16 x float> 21472 %2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i32 8) 21473 %3 = bitcast i16 %__u to <16 x i1> 21474 %4 = and <16 x i1> %2, %3 21475 %5 = bitcast <16 x i1> %4 to i16 21476 %6 = zext i16 %5 to i64 21477 ret i64 %6 21478} 21479 21480 21481 21482declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32) 21483define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21484; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: 21485; VLX: # %bb.0: # %entry 21486; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21487; VLX-NEXT: kmovb %k0, %eax 21488; VLX-NEXT: retq 21489; 21490; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: 21491; NoVLX: # %bb.0: # %entry 21492; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21493; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21494; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21495; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21496; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21497; NoVLX-NEXT: kmovw %k0, %eax 21498; NoVLX-NEXT: andl $3, %eax 21499; NoVLX-NEXT: vzeroupper 21500; NoVLX-NEXT: retq 21501entry: 21502 %0 = bitcast <2 x i64> %__a to <2 x double> 21503 %1 = bitcast <2 x i64> %__b to <2 x double> 21504 %2 = fcmp oeq <2 x double> %0, %1 21505 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21506 %4 = bitcast <4 x i1> %3 to i4 21507 ret i4 %4 21508} 21509 21510define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21511; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: 21512; VLX: # %bb.0: # %entry 21513; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21514; VLX-NEXT: kmovb %k0, %eax 21515; VLX-NEXT: retq 21516; 21517; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: 21518; NoVLX: # %bb.0: # %entry 21519; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21520; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21521; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21522; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21523; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21524; NoVLX-NEXT: kmovw %k0, %eax 21525; NoVLX-NEXT: andl $3, %eax 21526; NoVLX-NEXT: vzeroupper 21527; NoVLX-NEXT: retq 21528entry: 21529 %0 = bitcast <2 x i64> %__a to <2 x double> 21530 %load = load <2 x i64>, <2 x i64>* %__b 21531 %1 = bitcast <2 x i64> %load to <2 x double> 21532 %2 = fcmp oeq <2 x double> %0, %1 21533 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21534 %4 = bitcast <4 x i1> %3 to i4 21535 ret i4 %4 21536} 21537 21538define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { 21539; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21540; VLX: # %bb.0: # %entry 21541; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21542; VLX-NEXT: kmovb %k0, %eax 21543; VLX-NEXT: retq 21544; 21545; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21546; NoVLX: # %bb.0: # %entry 21547; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21548; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 21549; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21550; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21551; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21552; NoVLX-NEXT: kmovw %k0, %eax 21553; NoVLX-NEXT: andl $3, %eax 21554; NoVLX-NEXT: vzeroupper 21555; NoVLX-NEXT: retq 21556entry: 21557 %0 = bitcast <2 x i64> %__a to <2 x double> 21558 %load = load double, double* %__b 21559 %vec = insertelement <2 x double> undef, double %load, i32 0 21560 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21561 %2 = fcmp oeq <2 x double> %0, %1 21562 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21563 %4 = bitcast <4 x i1> %3 to i4 21564 ret i4 %4 21565} 21566 21567define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21568; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: 21569; VLX: # %bb.0: # %entry 21570; VLX-NEXT: kmovd %edi, %k1 21571; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21572; VLX-NEXT: kmovb %k0, %eax 21573; VLX-NEXT: retq 21574; 21575; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: 21576; NoVLX: # %bb.0: # %entry 21577; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21578; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21579; NoVLX-NEXT: kmovw %edi, %k1 21580; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21581; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21582; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21583; NoVLX-NEXT: kmovw %k0, %eax 21584; NoVLX-NEXT: andl $3, %eax 21585; NoVLX-NEXT: vzeroupper 21586; NoVLX-NEXT: retq 21587entry: 21588 %0 = bitcast <2 x i64> %__a to <2 x double> 21589 %1 = bitcast <2 x i64> %__b to <2 x double> 21590 %2 = fcmp oeq <2 x double> %0, %1 21591 %3 = bitcast i2 %__u to <2 x i1> 21592 %4 = and <2 x i1> %2, %3 21593 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21594 %6 = bitcast <4 x i1> %5 to i4 21595 ret i4 %6 21596} 21597 21598define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21599; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: 21600; VLX: # %bb.0: # %entry 21601; VLX-NEXT: kmovd %edi, %k1 21602; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21603; VLX-NEXT: kmovb %k0, %eax 21604; VLX-NEXT: retq 21605; 21606; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: 21607; NoVLX: # %bb.0: # %entry 21608; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21609; NoVLX-NEXT: kmovw %edi, %k1 21610; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21611; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21612; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21613; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21614; NoVLX-NEXT: kmovw %k0, %eax 21615; NoVLX-NEXT: andl $3, %eax 21616; NoVLX-NEXT: vzeroupper 21617; NoVLX-NEXT: retq 21618entry: 21619 %0 = bitcast <2 x i64> %__a to <2 x double> 21620 %load = load <2 x i64>, <2 x i64>* %__b 21621 %1 = bitcast <2 x i64> %load to <2 x double> 21622 %2 = fcmp oeq <2 x double> %0, %1 21623 %3 = bitcast i2 %__u to <2 x i1> 21624 %4 = and <2 x i1> %2, %3 21625 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21626 %6 = bitcast <4 x i1> %5 to i4 21627 ret i4 %6 21628} 21629 21630define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { 21631; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21632; VLX: # %bb.0: # %entry 21633; VLX-NEXT: kmovd %edi, %k1 21634; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21635; VLX-NEXT: kmovb %k0, %eax 21636; VLX-NEXT: retq 21637; 21638; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21639; NoVLX: # %bb.0: # %entry 21640; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21641; NoVLX-NEXT: kmovw %edi, %k1 21642; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 21643; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21644; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21645; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21646; NoVLX-NEXT: kmovw %k0, %eax 21647; NoVLX-NEXT: andl $3, %eax 21648; NoVLX-NEXT: vzeroupper 21649; NoVLX-NEXT: retq 21650entry: 21651 %0 = bitcast <2 x i64> %__a to <2 x double> 21652 %load = load double, double* %__b 21653 %vec = insertelement <2 x double> undef, double %load, i32 0 21654 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21655 %2 = fcmp oeq <2 x double> %0, %1 21656 %3 = bitcast i2 %__u to <2 x i1> 21657 %4 = and <2 x i1> %2, %3 21658 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21659 %6 = bitcast <4 x i1> %5 to i4 21660 ret i4 %6 21661} 21662 21663 21664 21665define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21666; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: 21667; VLX: # %bb.0: # %entry 21668; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21669; VLX-NEXT: kmovd %k0, %eax 21670; VLX-NEXT: # kill: def $al killed $al killed $eax 21671; VLX-NEXT: retq 21672; 21673; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: 21674; NoVLX: # %bb.0: # %entry 21675; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21676; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21677; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21678; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21679; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21680; NoVLX-NEXT: kmovw %k0, %eax 21681; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21682; NoVLX-NEXT: vzeroupper 21683; NoVLX-NEXT: retq 21684entry: 21685 %0 = bitcast <2 x i64> %__a to <2 x double> 21686 %1 = bitcast <2 x i64> %__b to <2 x double> 21687 %2 = fcmp oeq <2 x double> %0, %1 21688 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21689 %4 = bitcast <8 x i1> %3 to i8 21690 ret i8 %4 21691} 21692 21693define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21694; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: 21695; VLX: # %bb.0: # %entry 21696; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21697; VLX-NEXT: kmovd %k0, %eax 21698; VLX-NEXT: # kill: def $al killed $al killed $eax 21699; VLX-NEXT: retq 21700; 21701; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: 21702; NoVLX: # %bb.0: # %entry 21703; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21704; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21705; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21706; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21707; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21708; NoVLX-NEXT: kmovw %k0, %eax 21709; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21710; NoVLX-NEXT: vzeroupper 21711; NoVLX-NEXT: retq 21712entry: 21713 %0 = bitcast <2 x i64> %__a to <2 x double> 21714 %load = load <2 x i64>, <2 x i64>* %__b 21715 %1 = bitcast <2 x i64> %load to <2 x double> 21716 %2 = fcmp oeq <2 x double> %0, %1 21717 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21718 %4 = bitcast <8 x i1> %3 to i8 21719 ret i8 %4 21720} 21721 21722define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { 21723; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21724; VLX: # %bb.0: # %entry 21725; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21726; VLX-NEXT: kmovd %k0, %eax 21727; VLX-NEXT: # kill: def $al killed $al killed $eax 21728; VLX-NEXT: retq 21729; 21730; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21731; NoVLX: # %bb.0: # %entry 21732; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21733; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 21734; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21735; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21736; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21737; NoVLX-NEXT: kmovw %k0, %eax 21738; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21739; NoVLX-NEXT: vzeroupper 21740; NoVLX-NEXT: retq 21741entry: 21742 %0 = bitcast <2 x i64> %__a to <2 x double> 21743 %load = load double, double* %__b 21744 %vec = insertelement <2 x double> undef, double %load, i32 0 21745 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21746 %2 = fcmp oeq <2 x double> %0, %1 21747 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21748 %4 = bitcast <8 x i1> %3 to i8 21749 ret i8 %4 21750} 21751 21752define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21753; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: 21754; VLX: # %bb.0: # %entry 21755; VLX-NEXT: kmovd %edi, %k1 21756; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21757; VLX-NEXT: kmovd %k0, %eax 21758; VLX-NEXT: # kill: def $al killed $al killed $eax 21759; VLX-NEXT: retq 21760; 21761; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: 21762; NoVLX: # %bb.0: # %entry 21763; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21764; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21765; NoVLX-NEXT: kmovw %edi, %k1 21766; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21767; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21768; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21769; NoVLX-NEXT: kmovw %k0, %eax 21770; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21771; NoVLX-NEXT: vzeroupper 21772; NoVLX-NEXT: retq 21773entry: 21774 %0 = bitcast <2 x i64> %__a to <2 x double> 21775 %1 = bitcast <2 x i64> %__b to <2 x double> 21776 %2 = fcmp oeq <2 x double> %0, %1 21777 %3 = bitcast i2 %__u to <2 x i1> 21778 %4 = and <2 x i1> %2, %3 21779 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21780 %6 = bitcast <8 x i1> %5 to i8 21781 ret i8 %6 21782} 21783 21784define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21785; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: 21786; VLX: # %bb.0: # %entry 21787; VLX-NEXT: kmovd %edi, %k1 21788; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21789; VLX-NEXT: kmovd %k0, %eax 21790; VLX-NEXT: # kill: def $al killed $al killed $eax 21791; VLX-NEXT: retq 21792; 21793; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: 21794; NoVLX: # %bb.0: # %entry 21795; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21796; NoVLX-NEXT: kmovw %edi, %k1 21797; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21798; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21799; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21800; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21801; NoVLX-NEXT: kmovw %k0, %eax 21802; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21803; NoVLX-NEXT: vzeroupper 21804; NoVLX-NEXT: retq 21805entry: 21806 %0 = bitcast <2 x i64> %__a to <2 x double> 21807 %load = load <2 x i64>, <2 x i64>* %__b 21808 %1 = bitcast <2 x i64> %load to <2 x double> 21809 %2 = fcmp oeq <2 x double> %0, %1 21810 %3 = bitcast i2 %__u to <2 x i1> 21811 %4 = and <2 x i1> %2, %3 21812 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21813 %6 = bitcast <8 x i1> %5 to i8 21814 ret i8 %6 21815} 21816 21817define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { 21818; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21819; VLX: # %bb.0: # %entry 21820; VLX-NEXT: kmovd %edi, %k1 21821; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21822; VLX-NEXT: kmovd %k0, %eax 21823; VLX-NEXT: # kill: def $al killed $al killed $eax 21824; VLX-NEXT: retq 21825; 21826; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21827; NoVLX: # %bb.0: # %entry 21828; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21829; NoVLX-NEXT: kmovw %edi, %k1 21830; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 21831; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21832; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21833; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21834; NoVLX-NEXT: kmovw %k0, %eax 21835; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21836; NoVLX-NEXT: vzeroupper 21837; NoVLX-NEXT: retq 21838entry: 21839 %0 = bitcast <2 x i64> %__a to <2 x double> 21840 %load = load double, double* %__b 21841 %vec = insertelement <2 x double> undef, double %load, i32 0 21842 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21843 %2 = fcmp oeq <2 x double> %0, %1 21844 %3 = bitcast i2 %__u to <2 x i1> 21845 %4 = and <2 x i1> %2, %3 21846 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21847 %6 = bitcast <8 x i1> %5 to i8 21848 ret i8 %6 21849} 21850 21851 21852 21853define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21854; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: 21855; VLX: # %bb.0: # %entry 21856; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21857; VLX-NEXT: kmovd %k0, %eax 21858; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21859; VLX-NEXT: retq 21860; 21861; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: 21862; NoVLX: # %bb.0: # %entry 21863; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21864; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21865; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21866; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21867; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21868; NoVLX-NEXT: kmovw %k0, %eax 21869; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21870; NoVLX-NEXT: vzeroupper 21871; NoVLX-NEXT: retq 21872entry: 21873 %0 = bitcast <2 x i64> %__a to <2 x double> 21874 %1 = bitcast <2 x i64> %__b to <2 x double> 21875 %2 = fcmp oeq <2 x double> %0, %1 21876 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21877 %4 = bitcast <16 x i1> %3 to i16 21878 ret i16 %4 21879} 21880 21881define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21882; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: 21883; VLX: # %bb.0: # %entry 21884; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21885; VLX-NEXT: kmovd %k0, %eax 21886; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21887; VLX-NEXT: retq 21888; 21889; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: 21890; NoVLX: # %bb.0: # %entry 21891; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21892; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21893; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21894; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21895; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21896; NoVLX-NEXT: kmovw %k0, %eax 21897; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21898; NoVLX-NEXT: vzeroupper 21899; NoVLX-NEXT: retq 21900entry: 21901 %0 = bitcast <2 x i64> %__a to <2 x double> 21902 %load = load <2 x i64>, <2 x i64>* %__b 21903 %1 = bitcast <2 x i64> %load to <2 x double> 21904 %2 = fcmp oeq <2 x double> %0, %1 21905 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21906 %4 = bitcast <16 x i1> %3 to i16 21907 ret i16 %4 21908} 21909 21910define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { 21911; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21912; VLX: # %bb.0: # %entry 21913; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21914; VLX-NEXT: kmovd %k0, %eax 21915; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21916; VLX-NEXT: retq 21917; 21918; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21919; NoVLX: # %bb.0: # %entry 21920; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21921; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 21922; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21923; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21924; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21925; NoVLX-NEXT: kmovw %k0, %eax 21926; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21927; NoVLX-NEXT: vzeroupper 21928; NoVLX-NEXT: retq 21929entry: 21930 %0 = bitcast <2 x i64> %__a to <2 x double> 21931 %load = load double, double* %__b 21932 %vec = insertelement <2 x double> undef, double %load, i32 0 21933 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21934 %2 = fcmp oeq <2 x double> %0, %1 21935 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21936 %4 = bitcast <16 x i1> %3 to i16 21937 ret i16 %4 21938} 21939 21940define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21941; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: 21942; VLX: # %bb.0: # %entry 21943; VLX-NEXT: kmovd %edi, %k1 21944; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21945; VLX-NEXT: kmovd %k0, %eax 21946; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21947; VLX-NEXT: retq 21948; 21949; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: 21950; NoVLX: # %bb.0: # %entry 21951; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21952; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21953; NoVLX-NEXT: kmovw %edi, %k1 21954; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21955; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21956; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21957; NoVLX-NEXT: kmovw %k0, %eax 21958; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21959; NoVLX-NEXT: vzeroupper 21960; NoVLX-NEXT: retq 21961entry: 21962 %0 = bitcast <2 x i64> %__a to <2 x double> 21963 %1 = bitcast <2 x i64> %__b to <2 x double> 21964 %2 = fcmp oeq <2 x double> %0, %1 21965 %3 = bitcast i2 %__u to <2 x i1> 21966 %4 = and <2 x i1> %2, %3 21967 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21968 %6 = bitcast <16 x i1> %5 to i16 21969 ret i16 %6 21970} 21971 21972define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 21973; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: 21974; VLX: # %bb.0: # %entry 21975; VLX-NEXT: kmovd %edi, %k1 21976; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21977; VLX-NEXT: kmovd %k0, %eax 21978; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21979; VLX-NEXT: retq 21980; 21981; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: 21982; NoVLX: # %bb.0: # %entry 21983; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21984; NoVLX-NEXT: kmovw %edi, %k1 21985; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21986; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21987; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21988; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21989; NoVLX-NEXT: kmovw %k0, %eax 21990; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21991; NoVLX-NEXT: vzeroupper 21992; NoVLX-NEXT: retq 21993entry: 21994 %0 = bitcast <2 x i64> %__a to <2 x double> 21995 %load = load <2 x i64>, <2 x i64>* %__b 21996 %1 = bitcast <2 x i64> %load to <2 x double> 21997 %2 = fcmp oeq <2 x double> %0, %1 21998 %3 = bitcast i2 %__u to <2 x i1> 21999 %4 = and <2 x i1> %2, %3 22000 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22001 %6 = bitcast <16 x i1> %5 to i16 22002 ret i16 %6 22003} 22004 22005define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { 22006; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 22007; VLX: # %bb.0: # %entry 22008; VLX-NEXT: kmovd %edi, %k1 22009; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 22010; VLX-NEXT: kmovd %k0, %eax 22011; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22012; VLX-NEXT: retq 22013; 22014; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 22015; NoVLX: # %bb.0: # %entry 22016; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22017; NoVLX-NEXT: kmovw %edi, %k1 22018; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 22019; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22020; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22021; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22022; NoVLX-NEXT: kmovw %k0, %eax 22023; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22024; NoVLX-NEXT: vzeroupper 22025; NoVLX-NEXT: retq 22026entry: 22027 %0 = bitcast <2 x i64> %__a to <2 x double> 22028 %load = load double, double* %__b 22029 %vec = insertelement <2 x double> undef, double %load, i32 0 22030 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 22031 %2 = fcmp oeq <2 x double> %0, %1 22032 %3 = bitcast i2 %__u to <2 x i1> 22033 %4 = and <2 x i1> %2, %3 22034 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22035 %6 = bitcast <16 x i1> %5 to i16 22036 ret i16 %6 22037} 22038 22039 22040 22041define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 22042; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: 22043; VLX: # %bb.0: # %entry 22044; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 22045; VLX-NEXT: kmovd %k0, %eax 22046; VLX-NEXT: retq 22047; 22048; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: 22049; NoVLX: # %bb.0: # %entry 22050; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 22051; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22052; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22053; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22054; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22055; NoVLX-NEXT: kmovw %k0, %eax 22056; NoVLX-NEXT: vzeroupper 22057; NoVLX-NEXT: retq 22058entry: 22059 %0 = bitcast <2 x i64> %__a to <2 x double> 22060 %1 = bitcast <2 x i64> %__b to <2 x double> 22061 %2 = fcmp oeq <2 x double> %0, %1 22062 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22063 %4 = bitcast <32 x i1> %3 to i32 22064 ret i32 %4 22065} 22066 22067define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 22068; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: 22069; VLX: # %bb.0: # %entry 22070; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 22071; VLX-NEXT: kmovd %k0, %eax 22072; VLX-NEXT: retq 22073; 22074; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: 22075; NoVLX: # %bb.0: # %entry 22076; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22077; NoVLX-NEXT: vmovapd (%rdi), %xmm1 22078; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22079; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22080; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22081; NoVLX-NEXT: kmovw %k0, %eax 22082; NoVLX-NEXT: vzeroupper 22083; NoVLX-NEXT: retq 22084entry: 22085 %0 = bitcast <2 x i64> %__a to <2 x double> 22086 %load = load <2 x i64>, <2 x i64>* %__b 22087 %1 = bitcast <2 x i64> %load to <2 x double> 22088 %2 = fcmp oeq <2 x double> %0, %1 22089 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22090 %4 = bitcast <32 x i1> %3 to i32 22091 ret i32 %4 22092} 22093 22094define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { 22095; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 22096; VLX: # %bb.0: # %entry 22097; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 22098; VLX-NEXT: kmovd %k0, %eax 22099; VLX-NEXT: retq 22100; 22101; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 22102; NoVLX: # %bb.0: # %entry 22103; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22104; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 22105; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22106; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22107; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22108; NoVLX-NEXT: kmovw %k0, %eax 22109; NoVLX-NEXT: vzeroupper 22110; NoVLX-NEXT: retq 22111entry: 22112 %0 = bitcast <2 x i64> %__a to <2 x double> 22113 %load = load double, double* %__b 22114 %vec = insertelement <2 x double> undef, double %load, i32 0 22115 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 22116 %2 = fcmp oeq <2 x double> %0, %1 22117 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22118 %4 = bitcast <32 x i1> %3 to i32 22119 ret i32 %4 22120} 22121 22122define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 22123; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: 22124; VLX: # %bb.0: # %entry 22125; VLX-NEXT: kmovd %edi, %k1 22126; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 22127; VLX-NEXT: kmovd %k0, %eax 22128; VLX-NEXT: retq 22129; 22130; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: 22131; NoVLX: # %bb.0: # %entry 22132; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 22133; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22134; NoVLX-NEXT: kmovw %edi, %k1 22135; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22136; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22137; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22138; NoVLX-NEXT: kmovw %k0, %eax 22139; NoVLX-NEXT: vzeroupper 22140; NoVLX-NEXT: retq 22141entry: 22142 %0 = bitcast <2 x i64> %__a to <2 x double> 22143 %1 = bitcast <2 x i64> %__b to <2 x double> 22144 %2 = fcmp oeq <2 x double> %0, %1 22145 %3 = bitcast i2 %__u to <2 x i1> 22146 %4 = and <2 x i1> %2, %3 22147 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22148 %6 = bitcast <32 x i1> %5 to i32 22149 ret i32 %6 22150} 22151 22152define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 22153; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: 22154; VLX: # %bb.0: # %entry 22155; VLX-NEXT: kmovd %edi, %k1 22156; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 22157; VLX-NEXT: kmovd %k0, %eax 22158; VLX-NEXT: retq 22159; 22160; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: 22161; NoVLX: # %bb.0: # %entry 22162; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22163; NoVLX-NEXT: kmovw %edi, %k1 22164; NoVLX-NEXT: vmovapd (%rsi), %xmm1 22165; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22166; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22167; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22168; NoVLX-NEXT: kmovw %k0, %eax 22169; NoVLX-NEXT: vzeroupper 22170; NoVLX-NEXT: retq 22171entry: 22172 %0 = bitcast <2 x i64> %__a to <2 x double> 22173 %load = load <2 x i64>, <2 x i64>* %__b 22174 %1 = bitcast <2 x i64> %load to <2 x double> 22175 %2 = fcmp oeq <2 x double> %0, %1 22176 %3 = bitcast i2 %__u to <2 x i1> 22177 %4 = and <2 x i1> %2, %3 22178 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22179 %6 = bitcast <32 x i1> %5 to i32 22180 ret i32 %6 22181} 22182 22183define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { 22184; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 22185; VLX: # %bb.0: # %entry 22186; VLX-NEXT: kmovd %edi, %k1 22187; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 22188; VLX-NEXT: kmovd %k0, %eax 22189; VLX-NEXT: retq 22190; 22191; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 22192; NoVLX: # %bb.0: # %entry 22193; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22194; NoVLX-NEXT: kmovw %edi, %k1 22195; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 22196; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22197; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22198; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22199; NoVLX-NEXT: kmovw %k0, %eax 22200; NoVLX-NEXT: vzeroupper 22201; NoVLX-NEXT: retq 22202entry: 22203 %0 = bitcast <2 x i64> %__a to <2 x double> 22204 %load = load double, double* %__b 22205 %vec = insertelement <2 x double> undef, double %load, i32 0 22206 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 22207 %2 = fcmp oeq <2 x double> %0, %1 22208 %3 = bitcast i2 %__u to <2 x i1> 22209 %4 = and <2 x i1> %2, %3 22210 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22211 %6 = bitcast <32 x i1> %5 to i32 22212 ret i32 %6 22213} 22214 22215 22216 22217define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 22218; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: 22219; VLX: # %bb.0: # %entry 22220; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 22221; VLX-NEXT: kmovq %k0, %rax 22222; VLX-NEXT: retq 22223; 22224; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: 22225; NoVLX: # %bb.0: # %entry 22226; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 22227; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22228; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22229; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22230; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22231; NoVLX-NEXT: kmovw %k0, %eax 22232; NoVLX-NEXT: movzwl %ax, %eax 22233; NoVLX-NEXT: vzeroupper 22234; NoVLX-NEXT: retq 22235entry: 22236 %0 = bitcast <2 x i64> %__a to <2 x double> 22237 %1 = bitcast <2 x i64> %__b to <2 x double> 22238 %2 = fcmp oeq <2 x double> %0, %1 22239 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22240 %4 = bitcast <64 x i1> %3 to i64 22241 ret i64 %4 22242} 22243 22244define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 22245; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: 22246; VLX: # %bb.0: # %entry 22247; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 22248; VLX-NEXT: kmovq %k0, %rax 22249; VLX-NEXT: retq 22250; 22251; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: 22252; NoVLX: # %bb.0: # %entry 22253; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22254; NoVLX-NEXT: vmovapd (%rdi), %xmm1 22255; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22256; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22257; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22258; NoVLX-NEXT: kmovw %k0, %eax 22259; NoVLX-NEXT: movzwl %ax, %eax 22260; NoVLX-NEXT: vzeroupper 22261; NoVLX-NEXT: retq 22262entry: 22263 %0 = bitcast <2 x i64> %__a to <2 x double> 22264 %load = load <2 x i64>, <2 x i64>* %__b 22265 %1 = bitcast <2 x i64> %load to <2 x double> 22266 %2 = fcmp oeq <2 x double> %0, %1 22267 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22268 %4 = bitcast <64 x i1> %3 to i64 22269 ret i64 %4 22270} 22271 22272define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { 22273; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 22274; VLX: # %bb.0: # %entry 22275; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 22276; VLX-NEXT: kmovq %k0, %rax 22277; VLX-NEXT: retq 22278; 22279; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 22280; NoVLX: # %bb.0: # %entry 22281; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22282; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 22283; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22284; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22285; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22286; NoVLX-NEXT: kmovw %k0, %eax 22287; NoVLX-NEXT: movzwl %ax, %eax 22288; NoVLX-NEXT: vzeroupper 22289; NoVLX-NEXT: retq 22290entry: 22291 %0 = bitcast <2 x i64> %__a to <2 x double> 22292 %load = load double, double* %__b 22293 %vec = insertelement <2 x double> undef, double %load, i32 0 22294 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 22295 %2 = fcmp oeq <2 x double> %0, %1 22296 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22297 %4 = bitcast <64 x i1> %3 to i64 22298 ret i64 %4 22299} 22300 22301define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 22302; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: 22303; VLX: # %bb.0: # %entry 22304; VLX-NEXT: kmovd %edi, %k1 22305; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 22306; VLX-NEXT: kmovq %k0, %rax 22307; VLX-NEXT: retq 22308; 22309; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: 22310; NoVLX: # %bb.0: # %entry 22311; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 22312; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22313; NoVLX-NEXT: kmovw %edi, %k1 22314; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22315; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22316; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22317; NoVLX-NEXT: kmovw %k0, %eax 22318; NoVLX-NEXT: movzwl %ax, %eax 22319; NoVLX-NEXT: vzeroupper 22320; NoVLX-NEXT: retq 22321entry: 22322 %0 = bitcast <2 x i64> %__a to <2 x double> 22323 %1 = bitcast <2 x i64> %__b to <2 x double> 22324 %2 = fcmp oeq <2 x double> %0, %1 22325 %3 = bitcast i2 %__u to <2 x i1> 22326 %4 = and <2 x i1> %2, %3 22327 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22328 %6 = bitcast <64 x i1> %5 to i64 22329 ret i64 %6 22330} 22331 22332define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { 22333; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: 22334; VLX: # %bb.0: # %entry 22335; VLX-NEXT: kmovd %edi, %k1 22336; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 22337; VLX-NEXT: kmovq %k0, %rax 22338; VLX-NEXT: retq 22339; 22340; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: 22341; NoVLX: # %bb.0: # %entry 22342; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22343; NoVLX-NEXT: kmovw %edi, %k1 22344; NoVLX-NEXT: vmovapd (%rsi), %xmm1 22345; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22346; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22347; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22348; NoVLX-NEXT: kmovw %k0, %eax 22349; NoVLX-NEXT: movzwl %ax, %eax 22350; NoVLX-NEXT: vzeroupper 22351; NoVLX-NEXT: retq 22352entry: 22353 %0 = bitcast <2 x i64> %__a to <2 x double> 22354 %load = load <2 x i64>, <2 x i64>* %__b 22355 %1 = bitcast <2 x i64> %load to <2 x double> 22356 %2 = fcmp oeq <2 x double> %0, %1 22357 %3 = bitcast i2 %__u to <2 x i1> 22358 %4 = and <2 x i1> %2, %3 22359 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22360 %6 = bitcast <64 x i1> %5 to i64 22361 ret i64 %6 22362} 22363 22364define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr { 22365; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 22366; VLX: # %bb.0: # %entry 22367; VLX-NEXT: kmovd %edi, %k1 22368; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 22369; VLX-NEXT: kmovq %k0, %rax 22370; VLX-NEXT: retq 22371; 22372; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 22373; NoVLX: # %bb.0: # %entry 22374; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 22375; NoVLX-NEXT: kmovw %edi, %k1 22376; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 22377; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22378; NoVLX-NEXT: kshiftlw $14, %k0, %k0 22379; NoVLX-NEXT: kshiftrw $14, %k0, %k0 22380; NoVLX-NEXT: kmovw %k0, %eax 22381; NoVLX-NEXT: movzwl %ax, %eax 22382; NoVLX-NEXT: vzeroupper 22383; NoVLX-NEXT: retq 22384entry: 22385 %0 = bitcast <2 x i64> %__a to <2 x double> 22386 %load = load double, double* %__b 22387 %vec = insertelement <2 x double> undef, double %load, i32 0 22388 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 22389 %2 = fcmp oeq <2 x double> %0, %1 22390 %3 = bitcast i2 %__u to <2 x i1> 22391 %4 = and <2 x i1> %2, %3 22392 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 22393 %6 = bitcast <64 x i1> %5 to i64 22394 ret i64 %6 22395} 22396 22397 22398 22399define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22400; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: 22401; VLX: # %bb.0: # %entry 22402; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22403; VLX-NEXT: kmovd %k0, %eax 22404; VLX-NEXT: # kill: def $al killed $al killed $eax 22405; VLX-NEXT: vzeroupper 22406; VLX-NEXT: retq 22407; 22408; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: 22409; NoVLX: # %bb.0: # %entry 22410; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22411; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22412; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22413; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22414; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22415; NoVLX-NEXT: kmovw %k0, %eax 22416; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22417; NoVLX-NEXT: vzeroupper 22418; NoVLX-NEXT: retq 22419entry: 22420 %0 = bitcast <4 x i64> %__a to <4 x double> 22421 %1 = bitcast <4 x i64> %__b to <4 x double> 22422 %2 = fcmp oeq <4 x double> %0, %1 22423 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22424 %4 = bitcast <8 x i1> %3 to i8 22425 ret i8 %4 22426} 22427 22428define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22429; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: 22430; VLX: # %bb.0: # %entry 22431; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22432; VLX-NEXT: kmovd %k0, %eax 22433; VLX-NEXT: # kill: def $al killed $al killed $eax 22434; VLX-NEXT: vzeroupper 22435; VLX-NEXT: retq 22436; 22437; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: 22438; NoVLX: # %bb.0: # %entry 22439; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22440; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22441; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22442; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22443; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22444; NoVLX-NEXT: kmovw %k0, %eax 22445; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22446; NoVLX-NEXT: vzeroupper 22447; NoVLX-NEXT: retq 22448entry: 22449 %0 = bitcast <4 x i64> %__a to <4 x double> 22450 %load = load <4 x i64>, <4 x i64>* %__b 22451 %1 = bitcast <4 x i64> %load to <4 x double> 22452 %2 = fcmp oeq <4 x double> %0, %1 22453 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22454 %4 = bitcast <8 x i1> %3 to i8 22455 ret i8 %4 22456} 22457 22458define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { 22459; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22460; VLX: # %bb.0: # %entry 22461; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22462; VLX-NEXT: kmovd %k0, %eax 22463; VLX-NEXT: # kill: def $al killed $al killed $eax 22464; VLX-NEXT: vzeroupper 22465; VLX-NEXT: retq 22466; 22467; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22468; NoVLX: # %bb.0: # %entry 22469; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22470; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 22471; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22472; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22473; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22474; NoVLX-NEXT: kmovw %k0, %eax 22475; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22476; NoVLX-NEXT: vzeroupper 22477; NoVLX-NEXT: retq 22478entry: 22479 %0 = bitcast <4 x i64> %__a to <4 x double> 22480 %load = load double, double* %__b 22481 %vec = insertelement <4 x double> undef, double %load, i32 0 22482 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22483 %2 = fcmp oeq <4 x double> %0, %1 22484 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22485 %4 = bitcast <8 x i1> %3 to i8 22486 ret i8 %4 22487} 22488 22489define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22490; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: 22491; VLX: # %bb.0: # %entry 22492; VLX-NEXT: kmovd %edi, %k1 22493; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22494; VLX-NEXT: kmovd %k0, %eax 22495; VLX-NEXT: # kill: def $al killed $al killed $eax 22496; VLX-NEXT: vzeroupper 22497; VLX-NEXT: retq 22498; 22499; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: 22500; NoVLX: # %bb.0: # %entry 22501; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22502; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22503; NoVLX-NEXT: kmovw %edi, %k1 22504; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22505; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22506; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22507; NoVLX-NEXT: kmovw %k0, %eax 22508; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22509; NoVLX-NEXT: vzeroupper 22510; NoVLX-NEXT: retq 22511entry: 22512 %0 = bitcast <4 x i64> %__a to <4 x double> 22513 %1 = bitcast <4 x i64> %__b to <4 x double> 22514 %2 = fcmp oeq <4 x double> %0, %1 22515 %3 = bitcast i4 %__u to <4 x i1> 22516 %4 = and <4 x i1> %2, %3 22517 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22518 %6 = bitcast <8 x i1> %5 to i8 22519 ret i8 %6 22520} 22521 22522define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22523; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: 22524; VLX: # %bb.0: # %entry 22525; VLX-NEXT: kmovd %edi, %k1 22526; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22527; VLX-NEXT: kmovd %k0, %eax 22528; VLX-NEXT: # kill: def $al killed $al killed $eax 22529; VLX-NEXT: vzeroupper 22530; VLX-NEXT: retq 22531; 22532; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: 22533; NoVLX: # %bb.0: # %entry 22534; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22535; NoVLX-NEXT: kmovw %edi, %k1 22536; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22537; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22538; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22539; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22540; NoVLX-NEXT: kmovw %k0, %eax 22541; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22542; NoVLX-NEXT: vzeroupper 22543; NoVLX-NEXT: retq 22544entry: 22545 %0 = bitcast <4 x i64> %__a to <4 x double> 22546 %load = load <4 x i64>, <4 x i64>* %__b 22547 %1 = bitcast <4 x i64> %load to <4 x double> 22548 %2 = fcmp oeq <4 x double> %0, %1 22549 %3 = bitcast i4 %__u to <4 x i1> 22550 %4 = and <4 x i1> %2, %3 22551 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22552 %6 = bitcast <8 x i1> %5 to i8 22553 ret i8 %6 22554} 22555 22556define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { 22557; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22558; VLX: # %bb.0: # %entry 22559; VLX-NEXT: kmovd %edi, %k1 22560; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22561; VLX-NEXT: kmovd %k0, %eax 22562; VLX-NEXT: # kill: def $al killed $al killed $eax 22563; VLX-NEXT: vzeroupper 22564; VLX-NEXT: retq 22565; 22566; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22567; NoVLX: # %bb.0: # %entry 22568; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22569; NoVLX-NEXT: kmovw %edi, %k1 22570; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 22571; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22572; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22573; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22574; NoVLX-NEXT: kmovw %k0, %eax 22575; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22576; NoVLX-NEXT: vzeroupper 22577; NoVLX-NEXT: retq 22578entry: 22579 %0 = bitcast <4 x i64> %__a to <4 x double> 22580 %load = load double, double* %__b 22581 %vec = insertelement <4 x double> undef, double %load, i32 0 22582 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22583 %2 = fcmp oeq <4 x double> %0, %1 22584 %3 = bitcast i4 %__u to <4 x i1> 22585 %4 = and <4 x i1> %2, %3 22586 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22587 %6 = bitcast <8 x i1> %5 to i8 22588 ret i8 %6 22589} 22590 22591 22592 22593define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22594; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: 22595; VLX: # %bb.0: # %entry 22596; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22597; VLX-NEXT: kmovd %k0, %eax 22598; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22599; VLX-NEXT: vzeroupper 22600; VLX-NEXT: retq 22601; 22602; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: 22603; NoVLX: # %bb.0: # %entry 22604; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22605; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22606; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22607; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22608; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22609; NoVLX-NEXT: kmovw %k0, %eax 22610; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22611; NoVLX-NEXT: vzeroupper 22612; NoVLX-NEXT: retq 22613entry: 22614 %0 = bitcast <4 x i64> %__a to <4 x double> 22615 %1 = bitcast <4 x i64> %__b to <4 x double> 22616 %2 = fcmp oeq <4 x double> %0, %1 22617 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22618 %4 = bitcast <16 x i1> %3 to i16 22619 ret i16 %4 22620} 22621 22622define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22623; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: 22624; VLX: # %bb.0: # %entry 22625; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22626; VLX-NEXT: kmovd %k0, %eax 22627; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22628; VLX-NEXT: vzeroupper 22629; VLX-NEXT: retq 22630; 22631; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: 22632; NoVLX: # %bb.0: # %entry 22633; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22634; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22635; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22636; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22637; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22638; NoVLX-NEXT: kmovw %k0, %eax 22639; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22640; NoVLX-NEXT: vzeroupper 22641; NoVLX-NEXT: retq 22642entry: 22643 %0 = bitcast <4 x i64> %__a to <4 x double> 22644 %load = load <4 x i64>, <4 x i64>* %__b 22645 %1 = bitcast <4 x i64> %load to <4 x double> 22646 %2 = fcmp oeq <4 x double> %0, %1 22647 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22648 %4 = bitcast <16 x i1> %3 to i16 22649 ret i16 %4 22650} 22651 22652define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { 22653; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22654; VLX: # %bb.0: # %entry 22655; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22656; VLX-NEXT: kmovd %k0, %eax 22657; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22658; VLX-NEXT: vzeroupper 22659; VLX-NEXT: retq 22660; 22661; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22662; NoVLX: # %bb.0: # %entry 22663; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22664; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 22665; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22666; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22667; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22668; NoVLX-NEXT: kmovw %k0, %eax 22669; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22670; NoVLX-NEXT: vzeroupper 22671; NoVLX-NEXT: retq 22672entry: 22673 %0 = bitcast <4 x i64> %__a to <4 x double> 22674 %load = load double, double* %__b 22675 %vec = insertelement <4 x double> undef, double %load, i32 0 22676 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22677 %2 = fcmp oeq <4 x double> %0, %1 22678 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22679 %4 = bitcast <16 x i1> %3 to i16 22680 ret i16 %4 22681} 22682 22683define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22684; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: 22685; VLX: # %bb.0: # %entry 22686; VLX-NEXT: kmovd %edi, %k1 22687; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22688; VLX-NEXT: kmovd %k0, %eax 22689; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22690; VLX-NEXT: vzeroupper 22691; VLX-NEXT: retq 22692; 22693; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: 22694; NoVLX: # %bb.0: # %entry 22695; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22696; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22697; NoVLX-NEXT: kmovw %edi, %k1 22698; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22699; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22700; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22701; NoVLX-NEXT: kmovw %k0, %eax 22702; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22703; NoVLX-NEXT: vzeroupper 22704; NoVLX-NEXT: retq 22705entry: 22706 %0 = bitcast <4 x i64> %__a to <4 x double> 22707 %1 = bitcast <4 x i64> %__b to <4 x double> 22708 %2 = fcmp oeq <4 x double> %0, %1 22709 %3 = bitcast i4 %__u to <4 x i1> 22710 %4 = and <4 x i1> %2, %3 22711 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22712 %6 = bitcast <16 x i1> %5 to i16 22713 ret i16 %6 22714} 22715 22716define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22717; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: 22718; VLX: # %bb.0: # %entry 22719; VLX-NEXT: kmovd %edi, %k1 22720; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22721; VLX-NEXT: kmovd %k0, %eax 22722; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22723; VLX-NEXT: vzeroupper 22724; VLX-NEXT: retq 22725; 22726; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: 22727; NoVLX: # %bb.0: # %entry 22728; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22729; NoVLX-NEXT: kmovw %edi, %k1 22730; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22731; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22732; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22733; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22734; NoVLX-NEXT: kmovw %k0, %eax 22735; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22736; NoVLX-NEXT: vzeroupper 22737; NoVLX-NEXT: retq 22738entry: 22739 %0 = bitcast <4 x i64> %__a to <4 x double> 22740 %load = load <4 x i64>, <4 x i64>* %__b 22741 %1 = bitcast <4 x i64> %load to <4 x double> 22742 %2 = fcmp oeq <4 x double> %0, %1 22743 %3 = bitcast i4 %__u to <4 x i1> 22744 %4 = and <4 x i1> %2, %3 22745 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22746 %6 = bitcast <16 x i1> %5 to i16 22747 ret i16 %6 22748} 22749 22750define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { 22751; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22752; VLX: # %bb.0: # %entry 22753; VLX-NEXT: kmovd %edi, %k1 22754; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22755; VLX-NEXT: kmovd %k0, %eax 22756; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22757; VLX-NEXT: vzeroupper 22758; VLX-NEXT: retq 22759; 22760; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22761; NoVLX: # %bb.0: # %entry 22762; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22763; NoVLX-NEXT: kmovw %edi, %k1 22764; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 22765; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22766; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22767; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22768; NoVLX-NEXT: kmovw %k0, %eax 22769; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22770; NoVLX-NEXT: vzeroupper 22771; NoVLX-NEXT: retq 22772entry: 22773 %0 = bitcast <4 x i64> %__a to <4 x double> 22774 %load = load double, double* %__b 22775 %vec = insertelement <4 x double> undef, double %load, i32 0 22776 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22777 %2 = fcmp oeq <4 x double> %0, %1 22778 %3 = bitcast i4 %__u to <4 x i1> 22779 %4 = and <4 x i1> %2, %3 22780 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22781 %6 = bitcast <16 x i1> %5 to i16 22782 ret i16 %6 22783} 22784 22785 22786 22787define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22788; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: 22789; VLX: # %bb.0: # %entry 22790; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22791; VLX-NEXT: kmovd %k0, %eax 22792; VLX-NEXT: vzeroupper 22793; VLX-NEXT: retq 22794; 22795; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: 22796; NoVLX: # %bb.0: # %entry 22797; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22798; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22799; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22800; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22801; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22802; NoVLX-NEXT: kmovw %k0, %eax 22803; NoVLX-NEXT: vzeroupper 22804; NoVLX-NEXT: retq 22805entry: 22806 %0 = bitcast <4 x i64> %__a to <4 x double> 22807 %1 = bitcast <4 x i64> %__b to <4 x double> 22808 %2 = fcmp oeq <4 x double> %0, %1 22809 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22810 %4 = bitcast <32 x i1> %3 to i32 22811 ret i32 %4 22812} 22813 22814define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22815; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: 22816; VLX: # %bb.0: # %entry 22817; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22818; VLX-NEXT: kmovd %k0, %eax 22819; VLX-NEXT: vzeroupper 22820; VLX-NEXT: retq 22821; 22822; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: 22823; NoVLX: # %bb.0: # %entry 22824; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22825; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22826; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22827; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22828; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22829; NoVLX-NEXT: kmovw %k0, %eax 22830; NoVLX-NEXT: vzeroupper 22831; NoVLX-NEXT: retq 22832entry: 22833 %0 = bitcast <4 x i64> %__a to <4 x double> 22834 %load = load <4 x i64>, <4 x i64>* %__b 22835 %1 = bitcast <4 x i64> %load to <4 x double> 22836 %2 = fcmp oeq <4 x double> %0, %1 22837 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22838 %4 = bitcast <32 x i1> %3 to i32 22839 ret i32 %4 22840} 22841 22842define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { 22843; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22844; VLX: # %bb.0: # %entry 22845; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22846; VLX-NEXT: kmovd %k0, %eax 22847; VLX-NEXT: vzeroupper 22848; VLX-NEXT: retq 22849; 22850; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22851; NoVLX: # %bb.0: # %entry 22852; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22853; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 22854; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22855; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22856; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22857; NoVLX-NEXT: kmovw %k0, %eax 22858; NoVLX-NEXT: vzeroupper 22859; NoVLX-NEXT: retq 22860entry: 22861 %0 = bitcast <4 x i64> %__a to <4 x double> 22862 %load = load double, double* %__b 22863 %vec = insertelement <4 x double> undef, double %load, i32 0 22864 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22865 %2 = fcmp oeq <4 x double> %0, %1 22866 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22867 %4 = bitcast <32 x i1> %3 to i32 22868 ret i32 %4 22869} 22870 22871define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22872; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: 22873; VLX: # %bb.0: # %entry 22874; VLX-NEXT: kmovd %edi, %k1 22875; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22876; VLX-NEXT: kmovd %k0, %eax 22877; VLX-NEXT: vzeroupper 22878; VLX-NEXT: retq 22879; 22880; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: 22881; NoVLX: # %bb.0: # %entry 22882; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22883; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22884; NoVLX-NEXT: kmovw %edi, %k1 22885; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22886; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22887; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22888; NoVLX-NEXT: kmovw %k0, %eax 22889; NoVLX-NEXT: vzeroupper 22890; NoVLX-NEXT: retq 22891entry: 22892 %0 = bitcast <4 x i64> %__a to <4 x double> 22893 %1 = bitcast <4 x i64> %__b to <4 x double> 22894 %2 = fcmp oeq <4 x double> %0, %1 22895 %3 = bitcast i4 %__u to <4 x i1> 22896 %4 = and <4 x i1> %2, %3 22897 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22898 %6 = bitcast <32 x i1> %5 to i32 22899 ret i32 %6 22900} 22901 22902define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22903; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: 22904; VLX: # %bb.0: # %entry 22905; VLX-NEXT: kmovd %edi, %k1 22906; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22907; VLX-NEXT: kmovd %k0, %eax 22908; VLX-NEXT: vzeroupper 22909; VLX-NEXT: retq 22910; 22911; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: 22912; NoVLX: # %bb.0: # %entry 22913; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22914; NoVLX-NEXT: kmovw %edi, %k1 22915; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22916; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22917; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22918; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22919; NoVLX-NEXT: kmovw %k0, %eax 22920; NoVLX-NEXT: vzeroupper 22921; NoVLX-NEXT: retq 22922entry: 22923 %0 = bitcast <4 x i64> %__a to <4 x double> 22924 %load = load <4 x i64>, <4 x i64>* %__b 22925 %1 = bitcast <4 x i64> %load to <4 x double> 22926 %2 = fcmp oeq <4 x double> %0, %1 22927 %3 = bitcast i4 %__u to <4 x i1> 22928 %4 = and <4 x i1> %2, %3 22929 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22930 %6 = bitcast <32 x i1> %5 to i32 22931 ret i32 %6 22932} 22933 22934define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { 22935; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22936; VLX: # %bb.0: # %entry 22937; VLX-NEXT: kmovd %edi, %k1 22938; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22939; VLX-NEXT: kmovd %k0, %eax 22940; VLX-NEXT: vzeroupper 22941; VLX-NEXT: retq 22942; 22943; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22944; NoVLX: # %bb.0: # %entry 22945; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22946; NoVLX-NEXT: kmovw %edi, %k1 22947; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 22948; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22949; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22950; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22951; NoVLX-NEXT: kmovw %k0, %eax 22952; NoVLX-NEXT: vzeroupper 22953; NoVLX-NEXT: retq 22954entry: 22955 %0 = bitcast <4 x i64> %__a to <4 x double> 22956 %load = load double, double* %__b 22957 %vec = insertelement <4 x double> undef, double %load, i32 0 22958 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22959 %2 = fcmp oeq <4 x double> %0, %1 22960 %3 = bitcast i4 %__u to <4 x i1> 22961 %4 = and <4 x i1> %2, %3 22962 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22963 %6 = bitcast <32 x i1> %5 to i32 22964 ret i32 %6 22965} 22966 22967 22968 22969define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22970; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: 22971; VLX: # %bb.0: # %entry 22972; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22973; VLX-NEXT: kmovq %k0, %rax 22974; VLX-NEXT: vzeroupper 22975; VLX-NEXT: retq 22976; 22977; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: 22978; NoVLX: # %bb.0: # %entry 22979; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22980; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22981; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22982; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22983; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22984; NoVLX-NEXT: kmovw %k0, %eax 22985; NoVLX-NEXT: movzwl %ax, %eax 22986; NoVLX-NEXT: vzeroupper 22987; NoVLX-NEXT: retq 22988entry: 22989 %0 = bitcast <4 x i64> %__a to <4 x double> 22990 %1 = bitcast <4 x i64> %__b to <4 x double> 22991 %2 = fcmp oeq <4 x double> %0, %1 22992 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22993 %4 = bitcast <64 x i1> %3 to i64 22994 ret i64 %4 22995} 22996 22997define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 22998; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: 22999; VLX: # %bb.0: # %entry 23000; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 23001; VLX-NEXT: kmovq %k0, %rax 23002; VLX-NEXT: vzeroupper 23003; VLX-NEXT: retq 23004; 23005; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: 23006; NoVLX: # %bb.0: # %entry 23007; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 23008; NoVLX-NEXT: vmovapd (%rdi), %ymm1 23009; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23010; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23011; NoVLX-NEXT: kshiftrw $12, %k0, %k0 23012; NoVLX-NEXT: kmovw %k0, %eax 23013; NoVLX-NEXT: movzwl %ax, %eax 23014; NoVLX-NEXT: vzeroupper 23015; NoVLX-NEXT: retq 23016entry: 23017 %0 = bitcast <4 x i64> %__a to <4 x double> 23018 %load = load <4 x i64>, <4 x i64>* %__b 23019 %1 = bitcast <4 x i64> %load to <4 x double> 23020 %2 = fcmp oeq <4 x double> %0, %1 23021 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 23022 %4 = bitcast <64 x i1> %3 to i64 23023 ret i64 %4 23024} 23025 23026define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { 23027; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 23028; VLX: # %bb.0: # %entry 23029; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 23030; VLX-NEXT: kmovq %k0, %rax 23031; VLX-NEXT: vzeroupper 23032; VLX-NEXT: retq 23033; 23034; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 23035; NoVLX: # %bb.0: # %entry 23036; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 23037; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 23038; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23039; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23040; NoVLX-NEXT: kshiftrw $12, %k0, %k0 23041; NoVLX-NEXT: kmovw %k0, %eax 23042; NoVLX-NEXT: movzwl %ax, %eax 23043; NoVLX-NEXT: vzeroupper 23044; NoVLX-NEXT: retq 23045entry: 23046 %0 = bitcast <4 x i64> %__a to <4 x double> 23047 %load = load double, double* %__b 23048 %vec = insertelement <4 x double> undef, double %load, i32 0 23049 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 23050 %2 = fcmp oeq <4 x double> %0, %1 23051 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 23052 %4 = bitcast <64 x i1> %3 to i64 23053 ret i64 %4 23054} 23055 23056define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 23057; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: 23058; VLX: # %bb.0: # %entry 23059; VLX-NEXT: kmovd %edi, %k1 23060; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 23061; VLX-NEXT: kmovq %k0, %rax 23062; VLX-NEXT: vzeroupper 23063; VLX-NEXT: retq 23064; 23065; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: 23066; NoVLX: # %bb.0: # %entry 23067; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 23068; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 23069; NoVLX-NEXT: kmovw %edi, %k1 23070; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23071; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23072; NoVLX-NEXT: kshiftrw $12, %k0, %k0 23073; NoVLX-NEXT: kmovw %k0, %eax 23074; NoVLX-NEXT: movzwl %ax, %eax 23075; NoVLX-NEXT: vzeroupper 23076; NoVLX-NEXT: retq 23077entry: 23078 %0 = bitcast <4 x i64> %__a to <4 x double> 23079 %1 = bitcast <4 x i64> %__b to <4 x double> 23080 %2 = fcmp oeq <4 x double> %0, %1 23081 %3 = bitcast i4 %__u to <4 x i1> 23082 %4 = and <4 x i1> %2, %3 23083 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 23084 %6 = bitcast <64 x i1> %5 to i64 23085 ret i64 %6 23086} 23087 23088define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { 23089; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: 23090; VLX: # %bb.0: # %entry 23091; VLX-NEXT: kmovd %edi, %k1 23092; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 23093; VLX-NEXT: kmovq %k0, %rax 23094; VLX-NEXT: vzeroupper 23095; VLX-NEXT: retq 23096; 23097; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: 23098; NoVLX: # %bb.0: # %entry 23099; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 23100; NoVLX-NEXT: kmovw %edi, %k1 23101; NoVLX-NEXT: vmovapd (%rsi), %ymm1 23102; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23103; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23104; NoVLX-NEXT: kshiftrw $12, %k0, %k0 23105; NoVLX-NEXT: kmovw %k0, %eax 23106; NoVLX-NEXT: movzwl %ax, %eax 23107; NoVLX-NEXT: vzeroupper 23108; NoVLX-NEXT: retq 23109entry: 23110 %0 = bitcast <4 x i64> %__a to <4 x double> 23111 %load = load <4 x i64>, <4 x i64>* %__b 23112 %1 = bitcast <4 x i64> %load to <4 x double> 23113 %2 = fcmp oeq <4 x double> %0, %1 23114 %3 = bitcast i4 %__u to <4 x i1> 23115 %4 = and <4 x i1> %2, %3 23116 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 23117 %6 = bitcast <64 x i1> %5 to i64 23118 ret i64 %6 23119} 23120 23121define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr { 23122; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 23123; VLX: # %bb.0: # %entry 23124; VLX-NEXT: kmovd %edi, %k1 23125; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 23126; VLX-NEXT: kmovq %k0, %rax 23127; VLX-NEXT: vzeroupper 23128; VLX-NEXT: retq 23129; 23130; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 23131; NoVLX: # %bb.0: # %entry 23132; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 23133; NoVLX-NEXT: kmovw %edi, %k1 23134; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 23135; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23136; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23137; NoVLX-NEXT: kshiftrw $12, %k0, %k0 23138; NoVLX-NEXT: kmovw %k0, %eax 23139; NoVLX-NEXT: movzwl %ax, %eax 23140; NoVLX-NEXT: vzeroupper 23141; NoVLX-NEXT: retq 23142entry: 23143 %0 = bitcast <4 x i64> %__a to <4 x double> 23144 %load = load double, double* %__b 23145 %vec = insertelement <4 x double> undef, double %load, i32 0 23146 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 23147 %2 = fcmp oeq <4 x double> %0, %1 23148 %3 = bitcast i4 %__u to <4 x i1> 23149 %4 = and <4 x i1> %2, %3 23150 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 23151 %6 = bitcast <64 x i1> %5 to i64 23152 ret i64 %6 23153} 23154 23155 23156 23157define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23158; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: 23159; VLX: # %bb.0: # %entry 23160; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23161; VLX-NEXT: kmovd %k0, %eax 23162; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23163; VLX-NEXT: vzeroupper 23164; VLX-NEXT: retq 23165; 23166; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: 23167; NoVLX: # %bb.0: # %entry 23168; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23169; NoVLX-NEXT: kmovw %k0, %eax 23170; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23171; NoVLX-NEXT: vzeroupper 23172; NoVLX-NEXT: retq 23173entry: 23174 %0 = bitcast <8 x i64> %__a to <8 x double> 23175 %1 = bitcast <8 x i64> %__b to <8 x double> 23176 %2 = fcmp oeq <8 x double> %0, %1 23177 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23178 %4 = bitcast <16 x i1> %3 to i16 23179 ret i16 %4 23180} 23181 23182define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23183; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: 23184; VLX: # %bb.0: # %entry 23185; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23186; VLX-NEXT: kmovd %k0, %eax 23187; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23188; VLX-NEXT: vzeroupper 23189; VLX-NEXT: retq 23190; 23191; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: 23192; NoVLX: # %bb.0: # %entry 23193; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23194; NoVLX-NEXT: kmovw %k0, %eax 23195; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23196; NoVLX-NEXT: vzeroupper 23197; NoVLX-NEXT: retq 23198entry: 23199 %0 = bitcast <8 x i64> %__a to <8 x double> 23200 %load = load <8 x i64>, <8 x i64>* %__b 23201 %1 = bitcast <8 x i64> %load to <8 x double> 23202 %2 = fcmp oeq <8 x double> %0, %1 23203 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23204 %4 = bitcast <16 x i1> %3 to i16 23205 ret i16 %4 23206} 23207 23208define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { 23209; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 23210; VLX: # %bb.0: # %entry 23211; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23212; VLX-NEXT: kmovd %k0, %eax 23213; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23214; VLX-NEXT: vzeroupper 23215; VLX-NEXT: retq 23216; 23217; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 23218; NoVLX: # %bb.0: # %entry 23219; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23220; NoVLX-NEXT: kmovw %k0, %eax 23221; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23222; NoVLX-NEXT: vzeroupper 23223; NoVLX-NEXT: retq 23224entry: 23225 %0 = bitcast <8 x i64> %__a to <8 x double> 23226 %load = load double, double* %__b 23227 %vec = insertelement <8 x double> undef, double %load, i32 0 23228 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23229 %2 = fcmp oeq <8 x double> %0, %1 23230 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23231 %4 = bitcast <16 x i1> %3 to i16 23232 ret i16 %4 23233} 23234 23235define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23236; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: 23237; VLX: # %bb.0: # %entry 23238; VLX-NEXT: kmovd %edi, %k1 23239; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23240; VLX-NEXT: kmovd %k0, %eax 23241; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23242; VLX-NEXT: vzeroupper 23243; VLX-NEXT: retq 23244; 23245; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: 23246; NoVLX: # %bb.0: # %entry 23247; NoVLX-NEXT: kmovw %edi, %k1 23248; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23249; NoVLX-NEXT: kmovw %k0, %eax 23250; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23251; NoVLX-NEXT: vzeroupper 23252; NoVLX-NEXT: retq 23253entry: 23254 %0 = bitcast <8 x i64> %__a to <8 x double> 23255 %1 = bitcast <8 x i64> %__b to <8 x double> 23256 %2 = fcmp oeq <8 x double> %0, %1 23257 %3 = bitcast i8 %__u to <8 x i1> 23258 %4 = and <8 x i1> %2, %3 23259 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23260 %6 = bitcast <16 x i1> %5 to i16 23261 ret i16 %6 23262} 23263 23264define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23265; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: 23266; VLX: # %bb.0: # %entry 23267; VLX-NEXT: kmovd %edi, %k1 23268; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23269; VLX-NEXT: kmovd %k0, %eax 23270; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23271; VLX-NEXT: vzeroupper 23272; VLX-NEXT: retq 23273; 23274; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: 23275; NoVLX: # %bb.0: # %entry 23276; NoVLX-NEXT: kmovw %edi, %k1 23277; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23278; NoVLX-NEXT: kmovw %k0, %eax 23279; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23280; NoVLX-NEXT: vzeroupper 23281; NoVLX-NEXT: retq 23282entry: 23283 %0 = bitcast <8 x i64> %__a to <8 x double> 23284 %load = load <8 x i64>, <8 x i64>* %__b 23285 %1 = bitcast <8 x i64> %load to <8 x double> 23286 %2 = fcmp oeq <8 x double> %0, %1 23287 %3 = bitcast i8 %__u to <8 x i1> 23288 %4 = and <8 x i1> %2, %3 23289 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23290 %6 = bitcast <16 x i1> %5 to i16 23291 ret i16 %6 23292} 23293 23294define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { 23295; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 23296; VLX: # %bb.0: # %entry 23297; VLX-NEXT: kmovd %edi, %k1 23298; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23299; VLX-NEXT: kmovd %k0, %eax 23300; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23301; VLX-NEXT: vzeroupper 23302; VLX-NEXT: retq 23303; 23304; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 23305; NoVLX: # %bb.0: # %entry 23306; NoVLX-NEXT: kmovw %edi, %k1 23307; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23308; NoVLX-NEXT: kmovw %k0, %eax 23309; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23310; NoVLX-NEXT: vzeroupper 23311; NoVLX-NEXT: retq 23312entry: 23313 %0 = bitcast <8 x i64> %__a to <8 x double> 23314 %load = load double, double* %__b 23315 %vec = insertelement <8 x double> undef, double %load, i32 0 23316 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23317 %2 = fcmp oeq <8 x double> %0, %1 23318 %3 = bitcast i8 %__u to <8 x i1> 23319 %4 = and <8 x i1> %2, %3 23320 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23321 %6 = bitcast <16 x i1> %5 to i16 23322 ret i16 %6 23323} 23324 23325 23326 23327define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23328; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: 23329; VLX: # %bb.0: # %entry 23330; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23331; VLX-NEXT: kmovd %k0, %eax 23332; VLX-NEXT: movzbl %al, %eax 23333; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23334; VLX-NEXT: vzeroupper 23335; VLX-NEXT: retq 23336; 23337; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: 23338; NoVLX: # %bb.0: # %entry 23339; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23340; NoVLX-NEXT: kmovw %k0, %eax 23341; NoVLX-NEXT: movzbl %al, %eax 23342; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23343; NoVLX-NEXT: vzeroupper 23344; NoVLX-NEXT: retq 23345entry: 23346 %0 = bitcast <8 x i64> %__a to <8 x double> 23347 %1 = bitcast <8 x i64> %__b to <8 x double> 23348 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23349 %3 = bitcast <8 x i1> %2 to i8 23350 %4 = zext i8 %3 to i16 23351 ret i16 %4 23352} 23353 23354define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23355; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: 23356; VLX: # %bb.0: # %entry 23357; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23358; VLX-NEXT: kmovd %k0, %eax 23359; VLX-NEXT: andb %dil, %al 23360; VLX-NEXT: movzbl %al, %eax 23361; VLX-NEXT: # kill: def $ax killed $ax killed $eax 23362; VLX-NEXT: vzeroupper 23363; VLX-NEXT: retq 23364; 23365; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: 23366; NoVLX: # %bb.0: # %entry 23367; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23368; NoVLX-NEXT: kmovw %k0, %eax 23369; NoVLX-NEXT: andb %dil, %al 23370; NoVLX-NEXT: movzbl %al, %eax 23371; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 23372; NoVLX-NEXT: vzeroupper 23373; NoVLX-NEXT: retq 23374entry: 23375 %0 = bitcast <8 x i64> %__a to <8 x double> 23376 %1 = bitcast <8 x i64> %__b to <8 x double> 23377 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23378 %3 = bitcast i8 %__u to <8 x i1> 23379 %4 = and <8 x i1> %2, %3 23380 %5 = bitcast <8 x i1> %4 to i8 23381 %6 = zext i8 %5 to i16 23382 ret i16 %6 23383} 23384 23385 23386 23387define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23388; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: 23389; VLX: # %bb.0: # %entry 23390; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23391; VLX-NEXT: kmovd %k0, %eax 23392; VLX-NEXT: vzeroupper 23393; VLX-NEXT: retq 23394; 23395; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: 23396; NoVLX: # %bb.0: # %entry 23397; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23398; NoVLX-NEXT: kmovw %k0, %eax 23399; NoVLX-NEXT: vzeroupper 23400; NoVLX-NEXT: retq 23401entry: 23402 %0 = bitcast <8 x i64> %__a to <8 x double> 23403 %1 = bitcast <8 x i64> %__b to <8 x double> 23404 %2 = fcmp oeq <8 x double> %0, %1 23405 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23406 %4 = bitcast <32 x i1> %3 to i32 23407 ret i32 %4 23408} 23409 23410define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23411; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: 23412; VLX: # %bb.0: # %entry 23413; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23414; VLX-NEXT: kmovd %k0, %eax 23415; VLX-NEXT: vzeroupper 23416; VLX-NEXT: retq 23417; 23418; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: 23419; NoVLX: # %bb.0: # %entry 23420; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23421; NoVLX-NEXT: kmovw %k0, %eax 23422; NoVLX-NEXT: vzeroupper 23423; NoVLX-NEXT: retq 23424entry: 23425 %0 = bitcast <8 x i64> %__a to <8 x double> 23426 %load = load <8 x i64>, <8 x i64>* %__b 23427 %1 = bitcast <8 x i64> %load to <8 x double> 23428 %2 = fcmp oeq <8 x double> %0, %1 23429 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23430 %4 = bitcast <32 x i1> %3 to i32 23431 ret i32 %4 23432} 23433 23434define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { 23435; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23436; VLX: # %bb.0: # %entry 23437; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23438; VLX-NEXT: kmovd %k0, %eax 23439; VLX-NEXT: vzeroupper 23440; VLX-NEXT: retq 23441; 23442; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23443; NoVLX: # %bb.0: # %entry 23444; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23445; NoVLX-NEXT: kmovw %k0, %eax 23446; NoVLX-NEXT: vzeroupper 23447; NoVLX-NEXT: retq 23448entry: 23449 %0 = bitcast <8 x i64> %__a to <8 x double> 23450 %load = load double, double* %__b 23451 %vec = insertelement <8 x double> undef, double %load, i32 0 23452 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23453 %2 = fcmp oeq <8 x double> %0, %1 23454 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23455 %4 = bitcast <32 x i1> %3 to i32 23456 ret i32 %4 23457} 23458 23459define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23460; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: 23461; VLX: # %bb.0: # %entry 23462; VLX-NEXT: kmovd %edi, %k1 23463; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23464; VLX-NEXT: kmovd %k0, %eax 23465; VLX-NEXT: vzeroupper 23466; VLX-NEXT: retq 23467; 23468; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: 23469; NoVLX: # %bb.0: # %entry 23470; NoVLX-NEXT: kmovw %edi, %k1 23471; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23472; NoVLX-NEXT: kmovw %k0, %eax 23473; NoVLX-NEXT: vzeroupper 23474; NoVLX-NEXT: retq 23475entry: 23476 %0 = bitcast <8 x i64> %__a to <8 x double> 23477 %1 = bitcast <8 x i64> %__b to <8 x double> 23478 %2 = fcmp oeq <8 x double> %0, %1 23479 %3 = bitcast i8 %__u to <8 x i1> 23480 %4 = and <8 x i1> %2, %3 23481 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23482 %6 = bitcast <32 x i1> %5 to i32 23483 ret i32 %6 23484} 23485 23486define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23487; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: 23488; VLX: # %bb.0: # %entry 23489; VLX-NEXT: kmovd %edi, %k1 23490; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23491; VLX-NEXT: kmovd %k0, %eax 23492; VLX-NEXT: vzeroupper 23493; VLX-NEXT: retq 23494; 23495; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: 23496; NoVLX: # %bb.0: # %entry 23497; NoVLX-NEXT: kmovw %edi, %k1 23498; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23499; NoVLX-NEXT: kmovw %k0, %eax 23500; NoVLX-NEXT: vzeroupper 23501; NoVLX-NEXT: retq 23502entry: 23503 %0 = bitcast <8 x i64> %__a to <8 x double> 23504 %load = load <8 x i64>, <8 x i64>* %__b 23505 %1 = bitcast <8 x i64> %load to <8 x double> 23506 %2 = fcmp oeq <8 x double> %0, %1 23507 %3 = bitcast i8 %__u to <8 x i1> 23508 %4 = and <8 x i1> %2, %3 23509 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23510 %6 = bitcast <32 x i1> %5 to i32 23511 ret i32 %6 23512} 23513 23514define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { 23515; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23516; VLX: # %bb.0: # %entry 23517; VLX-NEXT: kmovd %edi, %k1 23518; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23519; VLX-NEXT: kmovd %k0, %eax 23520; VLX-NEXT: vzeroupper 23521; VLX-NEXT: retq 23522; 23523; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23524; NoVLX: # %bb.0: # %entry 23525; NoVLX-NEXT: kmovw %edi, %k1 23526; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23527; NoVLX-NEXT: kmovw %k0, %eax 23528; NoVLX-NEXT: vzeroupper 23529; NoVLX-NEXT: retq 23530entry: 23531 %0 = bitcast <8 x i64> %__a to <8 x double> 23532 %load = load double, double* %__b 23533 %vec = insertelement <8 x double> undef, double %load, i32 0 23534 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23535 %2 = fcmp oeq <8 x double> %0, %1 23536 %3 = bitcast i8 %__u to <8 x i1> 23537 %4 = and <8 x i1> %2, %3 23538 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23539 %6 = bitcast <32 x i1> %5 to i32 23540 ret i32 %6 23541} 23542 23543 23544 23545define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23546; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: 23547; VLX: # %bb.0: # %entry 23548; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23549; VLX-NEXT: kmovb %k0, %eax 23550; VLX-NEXT: vzeroupper 23551; VLX-NEXT: retq 23552; 23553; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: 23554; NoVLX: # %bb.0: # %entry 23555; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23556; NoVLX-NEXT: kmovw %k0, %eax 23557; NoVLX-NEXT: movzbl %al, %eax 23558; NoVLX-NEXT: vzeroupper 23559; NoVLX-NEXT: retq 23560entry: 23561 %0 = bitcast <8 x i64> %__a to <8 x double> 23562 %1 = bitcast <8 x i64> %__b to <8 x double> 23563 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23564 %3 = bitcast <8 x i1> %2 to i8 23565 %4 = zext i8 %3 to i32 23566 ret i32 %4 23567} 23568 23569define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23570; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: 23571; VLX: # %bb.0: # %entry 23572; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23573; VLX-NEXT: kmovd %k0, %eax 23574; VLX-NEXT: andb %dil, %al 23575; VLX-NEXT: movzbl %al, %eax 23576; VLX-NEXT: vzeroupper 23577; VLX-NEXT: retq 23578; 23579; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: 23580; NoVLX: # %bb.0: # %entry 23581; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23582; NoVLX-NEXT: kmovw %k0, %eax 23583; NoVLX-NEXT: andb %dil, %al 23584; NoVLX-NEXT: movzbl %al, %eax 23585; NoVLX-NEXT: vzeroupper 23586; NoVLX-NEXT: retq 23587entry: 23588 %0 = bitcast <8 x i64> %__a to <8 x double> 23589 %1 = bitcast <8 x i64> %__b to <8 x double> 23590 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23591 %3 = bitcast i8 %__u to <8 x i1> 23592 %4 = and <8 x i1> %2, %3 23593 %5 = bitcast <8 x i1> %4 to i8 23594 %6 = zext i8 %5 to i32 23595 ret i32 %6 23596} 23597 23598 23599 23600define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23601; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: 23602; VLX: # %bb.0: # %entry 23603; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23604; VLX-NEXT: kmovq %k0, %rax 23605; VLX-NEXT: vzeroupper 23606; VLX-NEXT: retq 23607; 23608; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: 23609; NoVLX: # %bb.0: # %entry 23610; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23611; NoVLX-NEXT: kmovw %k0, %eax 23612; NoVLX-NEXT: movzwl %ax, %eax 23613; NoVLX-NEXT: vzeroupper 23614; NoVLX-NEXT: retq 23615entry: 23616 %0 = bitcast <8 x i64> %__a to <8 x double> 23617 %1 = bitcast <8 x i64> %__b to <8 x double> 23618 %2 = fcmp oeq <8 x double> %0, %1 23619 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23620 %4 = bitcast <64 x i1> %3 to i64 23621 ret i64 %4 23622} 23623 23624define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23625; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: 23626; VLX: # %bb.0: # %entry 23627; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23628; VLX-NEXT: kmovq %k0, %rax 23629; VLX-NEXT: vzeroupper 23630; VLX-NEXT: retq 23631; 23632; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: 23633; NoVLX: # %bb.0: # %entry 23634; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23635; NoVLX-NEXT: kmovw %k0, %eax 23636; NoVLX-NEXT: movzwl %ax, %eax 23637; NoVLX-NEXT: vzeroupper 23638; NoVLX-NEXT: retq 23639entry: 23640 %0 = bitcast <8 x i64> %__a to <8 x double> 23641 %load = load <8 x i64>, <8 x i64>* %__b 23642 %1 = bitcast <8 x i64> %load to <8 x double> 23643 %2 = fcmp oeq <8 x double> %0, %1 23644 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23645 %4 = bitcast <64 x i1> %3 to i64 23646 ret i64 %4 23647} 23648 23649define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { 23650; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23651; VLX: # %bb.0: # %entry 23652; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23653; VLX-NEXT: kmovq %k0, %rax 23654; VLX-NEXT: vzeroupper 23655; VLX-NEXT: retq 23656; 23657; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23658; NoVLX: # %bb.0: # %entry 23659; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23660; NoVLX-NEXT: kmovw %k0, %eax 23661; NoVLX-NEXT: movzwl %ax, %eax 23662; NoVLX-NEXT: vzeroupper 23663; NoVLX-NEXT: retq 23664entry: 23665 %0 = bitcast <8 x i64> %__a to <8 x double> 23666 %load = load double, double* %__b 23667 %vec = insertelement <8 x double> undef, double %load, i32 0 23668 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23669 %2 = fcmp oeq <8 x double> %0, %1 23670 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23671 %4 = bitcast <64 x i1> %3 to i64 23672 ret i64 %4 23673} 23674 23675define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23676; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: 23677; VLX: # %bb.0: # %entry 23678; VLX-NEXT: kmovd %edi, %k1 23679; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23680; VLX-NEXT: kmovq %k0, %rax 23681; VLX-NEXT: vzeroupper 23682; VLX-NEXT: retq 23683; 23684; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: 23685; NoVLX: # %bb.0: # %entry 23686; NoVLX-NEXT: kmovw %edi, %k1 23687; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23688; NoVLX-NEXT: kmovw %k0, %eax 23689; NoVLX-NEXT: movzwl %ax, %eax 23690; NoVLX-NEXT: vzeroupper 23691; NoVLX-NEXT: retq 23692entry: 23693 %0 = bitcast <8 x i64> %__a to <8 x double> 23694 %1 = bitcast <8 x i64> %__b to <8 x double> 23695 %2 = fcmp oeq <8 x double> %0, %1 23696 %3 = bitcast i8 %__u to <8 x i1> 23697 %4 = and <8 x i1> %2, %3 23698 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23699 %6 = bitcast <64 x i1> %5 to i64 23700 ret i64 %6 23701} 23702 23703define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { 23704; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: 23705; VLX: # %bb.0: # %entry 23706; VLX-NEXT: kmovd %edi, %k1 23707; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23708; VLX-NEXT: kmovq %k0, %rax 23709; VLX-NEXT: vzeroupper 23710; VLX-NEXT: retq 23711; 23712; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: 23713; NoVLX: # %bb.0: # %entry 23714; NoVLX-NEXT: kmovw %edi, %k1 23715; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23716; NoVLX-NEXT: kmovw %k0, %eax 23717; NoVLX-NEXT: movzwl %ax, %eax 23718; NoVLX-NEXT: vzeroupper 23719; NoVLX-NEXT: retq 23720entry: 23721 %0 = bitcast <8 x i64> %__a to <8 x double> 23722 %load = load <8 x i64>, <8 x i64>* %__b 23723 %1 = bitcast <8 x i64> %load to <8 x double> 23724 %2 = fcmp oeq <8 x double> %0, %1 23725 %3 = bitcast i8 %__u to <8 x i1> 23726 %4 = and <8 x i1> %2, %3 23727 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23728 %6 = bitcast <64 x i1> %5 to i64 23729 ret i64 %6 23730} 23731 23732define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr { 23733; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23734; VLX: # %bb.0: # %entry 23735; VLX-NEXT: kmovd %edi, %k1 23736; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23737; VLX-NEXT: kmovq %k0, %rax 23738; VLX-NEXT: vzeroupper 23739; VLX-NEXT: retq 23740; 23741; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23742; NoVLX: # %bb.0: # %entry 23743; NoVLX-NEXT: kmovw %edi, %k1 23744; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23745; NoVLX-NEXT: kmovw %k0, %eax 23746; NoVLX-NEXT: movzwl %ax, %eax 23747; NoVLX-NEXT: vzeroupper 23748; NoVLX-NEXT: retq 23749entry: 23750 %0 = bitcast <8 x i64> %__a to <8 x double> 23751 %load = load double, double* %__b 23752 %vec = insertelement <8 x double> undef, double %load, i32 0 23753 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23754 %2 = fcmp oeq <8 x double> %0, %1 23755 %3 = bitcast i8 %__u to <8 x i1> 23756 %4 = and <8 x i1> %2, %3 23757 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23758 %6 = bitcast <64 x i1> %5 to i64 23759 ret i64 %6 23760} 23761 23762 23763 23764define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23765; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: 23766; VLX: # %bb.0: # %entry 23767; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23768; VLX-NEXT: kmovd %k0, %eax 23769; VLX-NEXT: movzbl %al, %eax 23770; VLX-NEXT: vzeroupper 23771; VLX-NEXT: retq 23772; 23773; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: 23774; NoVLX: # %bb.0: # %entry 23775; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23776; NoVLX-NEXT: kmovw %k0, %eax 23777; NoVLX-NEXT: movzbl %al, %eax 23778; NoVLX-NEXT: vzeroupper 23779; NoVLX-NEXT: retq 23780entry: 23781 %0 = bitcast <8 x i64> %__a to <8 x double> 23782 %1 = bitcast <8 x i64> %__b to <8 x double> 23783 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23784 %3 = bitcast <8 x i1> %2 to i8 23785 %4 = zext i8 %3 to i64 23786 ret i64 %4 23787} 23788 23789define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23790; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: 23791; VLX: # %bb.0: # %entry 23792; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23793; VLX-NEXT: kmovd %k0, %eax 23794; VLX-NEXT: andb %dil, %al 23795; VLX-NEXT: movzbl %al, %eax 23796; VLX-NEXT: vzeroupper 23797; VLX-NEXT: retq 23798; 23799; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: 23800; NoVLX: # %bb.0: # %entry 23801; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23802; NoVLX-NEXT: kmovw %k0, %eax 23803; NoVLX-NEXT: andb %dil, %al 23804; NoVLX-NEXT: movzbl %al, %eax 23805; NoVLX-NEXT: vzeroupper 23806; NoVLX-NEXT: retq 23807entry: 23808 %0 = bitcast <8 x i64> %__a to <8 x double> 23809 %1 = bitcast <8 x i64> %__b to <8 x double> 23810 %2 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i32 8) 23811 %3 = bitcast i8 %__u to <8 x i1> 23812 %4 = and <8 x i1> %2, %3 23813 %5 = bitcast <8 x i1> %4 to i8 23814 %6 = zext i8 %5 to i64 23815 ret i64 %6 23816} 23817 23818; Test that we understand that cmpps with rounding zeros the upper bits of the mask register. 23819define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) { 23820; VLX-LABEL: test_cmpm_rnd_zero: 23821; VLX: # %bb.0: 23822; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 23823; VLX-NEXT: kmovd %k0, %eax 23824; VLX-NEXT: vzeroupper 23825; VLX-NEXT: retq 23826; 23827; NoVLX-LABEL: test_cmpm_rnd_zero: 23828; NoVLX: # %bb.0: 23829; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 23830; NoVLX-NEXT: kmovw %k0, %eax 23831; NoVLX-NEXT: vzeroupper 23832; NoVLX-NEXT: retq 23833 %res = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i32 8) 23834 %1 = bitcast <16 x i1> %res to i16 23835 %cast = bitcast i16 %1 to <16 x i1> 23836 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 23837 %cast2 = bitcast <32 x i1> %shuffle to i32 23838 ret i32 %cast2 23839} 23840 23841define i8 @mask_zero_lower(<4 x i32> %a) { 23842; VLX-LABEL: mask_zero_lower: 23843; VLX: # %bb.0: 23844; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0 23845; VLX-NEXT: kshiftlb $4, %k0, %k0 23846; VLX-NEXT: kmovd %k0, %eax 23847; VLX-NEXT: # kill: def $al killed $al killed $eax 23848; VLX-NEXT: retq 23849; 23850; NoVLX-LABEL: mask_zero_lower: 23851; NoVLX: # %bb.0: 23852; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23853; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 23854; NoVLX-NEXT: kshiftlw $12, %k0, %k0 23855; NoVLX-NEXT: kshiftrw $8, %k0, %k0 23856; NoVLX-NEXT: kmovw %k0, %eax 23857; NoVLX-NEXT: # kill: def $al killed $al killed $eax 23858; NoVLX-NEXT: vzeroupper 23859; NoVLX-NEXT: retq 23860 %cmp = icmp ne <4 x i32> %a, zeroinitializer 23861 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 23862 %cast = bitcast <8 x i1> %concat to i8 23863 ret i8 %cast 23864} 23865