1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 7 8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { 9; SSE-LABEL: v8i64: 10; SSE: # %bb.0: 11; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 12; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 14; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 15; SSE-NEXT: pcmpgtq %xmm7, %xmm3 16; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 17; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 18; SSE-NEXT: pcmpgtq %xmm6, %xmm2 19; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 20; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 21; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 22; SSE-NEXT: pcmpgtq %xmm5, %xmm1 23; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 24; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 25; SSE-NEXT: pcmpgtq %xmm4, %xmm0 26; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 27; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 28; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 29; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 30; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11 31; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm11[0,2,2,3] 32; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] 33; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10 34; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm10[0,2,2,3] 35; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 36; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 37; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9 38; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm9[0,2,2,3] 39; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 40; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8 41; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm8[0,2,2,3] 42; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] 43; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 44; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 45; SSE-NEXT: pand %xmm0, %xmm3 46; SSE-NEXT: packsswb %xmm0, %xmm3 47; SSE-NEXT: pmovmskb %xmm3, %eax 48; SSE-NEXT: # kill: def $al killed $al killed $eax 49; SSE-NEXT: retq 50; 51; AVX1-LABEL: v8i64: 52; AVX1: # %bb.0: 53; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 54; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 55; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 56; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 57; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 58; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 59; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 60; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 61; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 62; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 63; AVX1-NEXT: vpackssdw %xmm8, %xmm0, %xmm0 64; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 65; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 66; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 67; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2 68; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 69; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 70; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 71; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 72; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3 73; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 74; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 75; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 76; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 77; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 78; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 79; AVX1-NEXT: vpmovmskb %xmm0, %eax 80; AVX1-NEXT: # kill: def $al killed $al killed $eax 81; AVX1-NEXT: vzeroupper 82; AVX1-NEXT: retq 83; 84; AVX2-LABEL: v8i64: 85; AVX2: # %bb.0: 86; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 87; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 88; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 89; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 90; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 91; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 92; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 93; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1 94; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 95; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 96; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2 97; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 98; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 99; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 100; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 101; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 102; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 103; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 104; AVX2-NEXT: vpmovmskb %xmm0, %eax 105; AVX2-NEXT: # kill: def $al killed $al killed $eax 106; AVX2-NEXT: vzeroupper 107; AVX2-NEXT: retq 108; 109; AVX512F-LABEL: v8i64: 110; AVX512F: # %bb.0: 111; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 112; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 113; AVX512F-NEXT: kmovw %k0, %eax 114; AVX512F-NEXT: # kill: def $al killed $al killed $eax 115; AVX512F-NEXT: vzeroupper 116; AVX512F-NEXT: retq 117; 118; AVX512BW-LABEL: v8i64: 119; AVX512BW: # %bb.0: 120; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 121; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 122; AVX512BW-NEXT: kmovd %k0, %eax 123; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 124; AVX512BW-NEXT: vzeroupper 125; AVX512BW-NEXT: retq 126 %x0 = icmp sgt <8 x i64> %a, %b 127 %x1 = icmp sgt <8 x i64> %c, %d 128 %y = and <8 x i1> %x0, %x1 129 %res = bitcast <8 x i1> %y to i8 130 ret i8 %res 131} 132 133define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { 134; SSE-LABEL: v8f64: 135; SSE: # %bb.0: 136; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 137; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9 138; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10 139; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11 140; SSE-NEXT: cmpltpd %xmm3, %xmm7 141; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2,2,3] 142; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[0,1,0,2,4,5,6,7] 143; SSE-NEXT: cmpltpd %xmm2, %xmm6 144; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2,2,3] 145; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[0,1,0,2,4,5,6,7] 146; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 147; SSE-NEXT: cmpltpd %xmm1, %xmm5 148; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3] 149; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7] 150; SSE-NEXT: cmpltpd %xmm0, %xmm4 151; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3] 152; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7] 153; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 154; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 155; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11 156; SSE-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2,2,3] 157; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm11[0,1,0,2,4,5,6,7] 158; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10 159; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3] 160; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm10[0,1,0,2,4,5,6,7] 161; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 162; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9 163; SSE-NEXT: shufps {{.*#+}} xmm9 = xmm9[0,2,2,3] 164; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm9[0,2,2,3,4,5,6,7] 165; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8 166; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2,2,3] 167; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm8[0,2,2,3,4,5,6,7] 168; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 169; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 170; SSE-NEXT: pand %xmm0, %xmm3 171; SSE-NEXT: packsswb %xmm0, %xmm3 172; SSE-NEXT: pmovmskb %xmm3, %eax 173; SSE-NEXT: # kill: def $al killed $al killed $eax 174; SSE-NEXT: retq 175; 176; AVX12-LABEL: v8f64: 177; AVX12: # %bb.0: 178; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 179; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 180; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 181; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 182; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 183; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 184; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 185; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1 186; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 187; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 188; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2 189; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 190; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 191; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 192; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 193; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0 194; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0 195; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 196; AVX12-NEXT: vpmovmskb %xmm0, %eax 197; AVX12-NEXT: # kill: def $al killed $al killed $eax 198; AVX12-NEXT: vzeroupper 199; AVX12-NEXT: retq 200; 201; AVX512F-LABEL: v8f64: 202; AVX512F: # %bb.0: 203; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 204; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 205; AVX512F-NEXT: kmovw %k0, %eax 206; AVX512F-NEXT: # kill: def $al killed $al killed $eax 207; AVX512F-NEXT: vzeroupper 208; AVX512F-NEXT: retq 209; 210; AVX512BW-LABEL: v8f64: 211; AVX512BW: # %bb.0: 212; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 213; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 214; AVX512BW-NEXT: kmovd %k0, %eax 215; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 216; AVX512BW-NEXT: vzeroupper 217; AVX512BW-NEXT: retq 218 %x0 = fcmp ogt <8 x double> %a, %b 219 %x1 = fcmp ogt <8 x double> %c, %d 220 %y = and <8 x i1> %x0, %x1 221 %res = bitcast <8 x i1> %y to i8 222 ret i8 %res 223} 224 225define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { 226; SSE-LABEL: v32i16: 227; SSE: # %bb.0: 228; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 229; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 230; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 231; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 232; SSE-NEXT: pcmpgtw %xmm5, %xmm1 233; SSE-NEXT: pcmpgtw %xmm4, %xmm0 234; SSE-NEXT: packsswb %xmm1, %xmm0 235; SSE-NEXT: pcmpgtw %xmm7, %xmm3 236; SSE-NEXT: pcmpgtw %xmm6, %xmm2 237; SSE-NEXT: packsswb %xmm3, %xmm2 238; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11 239; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10 240; SSE-NEXT: packsswb %xmm11, %xmm10 241; SSE-NEXT: pand %xmm0, %xmm10 242; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9 243; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8 244; SSE-NEXT: packsswb %xmm9, %xmm8 245; SSE-NEXT: pand %xmm2, %xmm8 246; SSE-NEXT: pmovmskb %xmm10, %ecx 247; SSE-NEXT: pmovmskb %xmm8, %eax 248; SSE-NEXT: shll $16, %eax 249; SSE-NEXT: orl %ecx, %eax 250; SSE-NEXT: retq 251; 252; AVX1-LABEL: v32i16: 253; AVX1: # %bb.0: 254; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 255; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 256; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 257; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 258; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8 259; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 260; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 261; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 262; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 263; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 264; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 265; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 266; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 267; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2 268; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 269; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1 270; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 271; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 272; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 273; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3 274; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2 275; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 276; AVX1-NEXT: vpmovmskb %xmm0, %ecx 277; AVX1-NEXT: vpmovmskb %xmm1, %eax 278; AVX1-NEXT: shll $16, %eax 279; AVX1-NEXT: orl %ecx, %eax 280; AVX1-NEXT: vzeroupper 281; AVX1-NEXT: retq 282; 283; AVX2-LABEL: v32i16: 284; AVX2: # %bb.0: 285; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 286; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 287; AVX2-NEXT: vpacksswb %xmm3, %xmm1, %xmm1 288; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 289; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 290; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 291; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 292; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1 293; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 294; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 295; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 296; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 297; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 298; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 299; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 300; AVX2-NEXT: vpmovmskb %ymm0, %eax 301; AVX2-NEXT: vzeroupper 302; AVX2-NEXT: retq 303; 304; AVX512F-LABEL: v32i16: 305; AVX512F: # %bb.0: 306; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 307; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 308; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 309; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 310; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 311; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 312; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0 313; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 314; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1 315; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 316; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} 317; AVX512F-NEXT: kmovw %k0, %ecx 318; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} 319; AVX512F-NEXT: kmovw %k0, %eax 320; AVX512F-NEXT: shll $16, %eax 321; AVX512F-NEXT: orl %ecx, %eax 322; AVX512F-NEXT: vzeroupper 323; AVX512F-NEXT: retq 324; 325; AVX512BW-LABEL: v32i16: 326; AVX512BW: # %bb.0: 327; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1 328; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1} 329; AVX512BW-NEXT: kmovd %k0, %eax 330; AVX512BW-NEXT: vzeroupper 331; AVX512BW-NEXT: retq 332 %x0 = icmp sgt <32 x i16> %a, %b 333 %x1 = icmp sgt <32 x i16> %c, %d 334 %y = and <32 x i1> %x0, %x1 335 %res = bitcast <32 x i1> %y to i32 336 ret i32 %res 337} 338 339define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) { 340; SSE-LABEL: v16i32: 341; SSE: # %bb.0: 342; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 343; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 344; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 345; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 346; SSE-NEXT: pcmpgtd %xmm7, %xmm3 347; SSE-NEXT: movdqa {{.*#+}} xmm7 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u> 348; SSE-NEXT: pshufb %xmm7, %xmm3 349; SSE-NEXT: pcmpgtd %xmm6, %xmm2 350; SSE-NEXT: pshufb %xmm7, %xmm2 351; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 352; SSE-NEXT: pcmpgtd %xmm5, %xmm1 353; SSE-NEXT: movdqa {{.*#+}} xmm3 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 354; SSE-NEXT: pshufb %xmm3, %xmm1 355; SSE-NEXT: pcmpgtd %xmm4, %xmm0 356; SSE-NEXT: pshufb %xmm3, %xmm0 357; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 358; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 359; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11 360; SSE-NEXT: pshufb %xmm7, %xmm11 361; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9 362; SSE-NEXT: pshufb %xmm7, %xmm9 363; SSE-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1] 364; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10 365; SSE-NEXT: pshufb %xmm3, %xmm10 366; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8 367; SSE-NEXT: pshufb %xmm3, %xmm8 368; SSE-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1] 369; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7] 370; SSE-NEXT: pand %xmm0, %xmm8 371; SSE-NEXT: pmovmskb %xmm8, %eax 372; SSE-NEXT: # kill: def $ax killed $ax killed $eax 373; SSE-NEXT: retq 374; 375; AVX1-LABEL: v16i32: 376; AVX1: # %bb.0: 377; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 378; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 379; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 380; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 381; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 382; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 383; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 384; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 385; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 386; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 387; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0 388; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 389; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 390; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 391; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2 392; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 393; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 394; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 395; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 396; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3 397; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 398; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 399; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 400; AVX1-NEXT: vpmovmskb %xmm0, %eax 401; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 402; AVX1-NEXT: vzeroupper 403; AVX1-NEXT: retq 404; 405; AVX2-LABEL: v16i32: 406; AVX2: # %bb.0: 407; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 408; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 409; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 410; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 411; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 412; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 413; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 414; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1 415; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 416; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 417; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2 418; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 419; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 420; AVX2-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 421; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 422; AVX2-NEXT: vpmovmskb %xmm0, %eax 423; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 424; AVX2-NEXT: vzeroupper 425; AVX2-NEXT: retq 426; 427; AVX512F-LABEL: v16i32: 428; AVX512F: # %bb.0: 429; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 430; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 431; AVX512F-NEXT: kmovw %k0, %eax 432; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 433; AVX512F-NEXT: vzeroupper 434; AVX512F-NEXT: retq 435; 436; AVX512BW-LABEL: v16i32: 437; AVX512BW: # %bb.0: 438; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 439; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 440; AVX512BW-NEXT: kmovd %k0, %eax 441; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 442; AVX512BW-NEXT: vzeroupper 443; AVX512BW-NEXT: retq 444 %x0 = icmp sgt <16 x i32> %a, %b 445 %x1 = icmp sgt <16 x i32> %c, %d 446 %y = and <16 x i1> %x0, %x1 447 %res = bitcast <16 x i1> %y to i16 448 ret i16 %res 449} 450 451define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { 452; SSE-LABEL: v16f32: 453; SSE: # %bb.0: 454; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 455; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 456; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 457; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 458; SSE-NEXT: cmpltps %xmm3, %xmm7 459; SSE-NEXT: movdqa {{.*#+}} xmm3 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u> 460; SSE-NEXT: pshufb %xmm3, %xmm7 461; SSE-NEXT: cmpltps %xmm2, %xmm6 462; SSE-NEXT: pshufb %xmm3, %xmm6 463; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] 464; SSE-NEXT: cmpltps %xmm1, %xmm5 465; SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 466; SSE-NEXT: pshufb %xmm1, %xmm5 467; SSE-NEXT: cmpltps %xmm0, %xmm4 468; SSE-NEXT: pshufb %xmm1, %xmm4 469; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 470; SSE-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7] 471; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11 472; SSE-NEXT: pshufb %xmm3, %xmm11 473; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9 474; SSE-NEXT: pshufb %xmm3, %xmm9 475; SSE-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm11[0],xmm9[1],xmm11[1] 476; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10 477; SSE-NEXT: pshufb %xmm1, %xmm10 478; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8 479; SSE-NEXT: pshufb %xmm1, %xmm8 480; SSE-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm10[0],xmm8[1],xmm10[1] 481; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm9[4,5,6,7] 482; SSE-NEXT: pand %xmm4, %xmm8 483; SSE-NEXT: pmovmskb %xmm8, %eax 484; SSE-NEXT: # kill: def $ax killed $ax killed $eax 485; SSE-NEXT: retq 486; 487; AVX12-LABEL: v16f32: 488; AVX12: # %bb.0: 489; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 490; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 491; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 492; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 493; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 494; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 495; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 496; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1 497; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 498; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 499; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2 500; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 501; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 502; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 503; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 504; AVX12-NEXT: vpmovmskb %xmm0, %eax 505; AVX12-NEXT: # kill: def $ax killed $ax killed $eax 506; AVX12-NEXT: vzeroupper 507; AVX12-NEXT: retq 508; 509; AVX512F-LABEL: v16f32: 510; AVX512F: # %bb.0: 511; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 512; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 513; AVX512F-NEXT: kmovw %k0, %eax 514; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 515; AVX512F-NEXT: vzeroupper 516; AVX512F-NEXT: retq 517; 518; AVX512BW-LABEL: v16f32: 519; AVX512BW: # %bb.0: 520; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1 521; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 522; AVX512BW-NEXT: kmovd %k0, %eax 523; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 524; AVX512BW-NEXT: vzeroupper 525; AVX512BW-NEXT: retq 526 %x0 = fcmp ogt <16 x float> %a, %b 527 %x1 = fcmp ogt <16 x float> %c, %d 528 %y = and <16 x i1> %x0, %x1 529 %res = bitcast <16 x i1> %y to i16 530 ret i16 %res 531} 532 533define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { 534; SSE-LABEL: v64i8: 535; SSE: # %bb.0: 536; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 537; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 538; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 539; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 540; SSE-NEXT: pcmpgtb %xmm7, %xmm3 541; SSE-NEXT: pcmpgtb %xmm6, %xmm2 542; SSE-NEXT: pcmpgtb %xmm5, %xmm1 543; SSE-NEXT: pcmpgtb %xmm4, %xmm0 544; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9 545; SSE-NEXT: pand %xmm3, %xmm9 546; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8 547; SSE-NEXT: pand %xmm2, %xmm8 548; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11 549; SSE-NEXT: pand %xmm1, %xmm11 550; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10 551; SSE-NEXT: pand %xmm0, %xmm10 552; SSE-NEXT: pmovmskb %xmm10, %eax 553; SSE-NEXT: pmovmskb %xmm11, %ecx 554; SSE-NEXT: shll $16, %ecx 555; SSE-NEXT: orl %eax, %ecx 556; SSE-NEXT: pmovmskb %xmm8, %edx 557; SSE-NEXT: pmovmskb %xmm9, %eax 558; SSE-NEXT: shll $16, %eax 559; SSE-NEXT: orl %edx, %eax 560; SSE-NEXT: shlq $32, %rax 561; SSE-NEXT: orq %rcx, %rax 562; SSE-NEXT: retq 563; 564; AVX1-LABEL: v64i8: 565; AVX1: # %bb.0: 566; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 567; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 568; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8 569; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm9 570; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 571; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 572; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 573; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 574; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm2 575; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 576; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 577; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2 578; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm3 579; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3 580; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5 581; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7 582; AVX1-NEXT: vpcmpgtb %xmm5, %xmm7, %xmm5 583; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 584; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm4 585; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 586; AVX1-NEXT: vpmovmskb %xmm0, %eax 587; AVX1-NEXT: vpmovmskb %xmm1, %ecx 588; AVX1-NEXT: shll $16, %ecx 589; AVX1-NEXT: orl %eax, %ecx 590; AVX1-NEXT: vpmovmskb %xmm3, %edx 591; AVX1-NEXT: vpmovmskb %xmm2, %eax 592; AVX1-NEXT: shll $16, %eax 593; AVX1-NEXT: orl %edx, %eax 594; AVX1-NEXT: shlq $32, %rax 595; AVX1-NEXT: orq %rcx, %rax 596; AVX1-NEXT: vzeroupper 597; AVX1-NEXT: retq 598; 599; AVX2-LABEL: v64i8: 600; AVX2: # %bb.0: 601; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 602; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 603; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2 604; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 605; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 606; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 607; AVX2-NEXT: vpmovmskb %ymm0, %ecx 608; AVX2-NEXT: vpmovmskb %ymm1, %eax 609; AVX2-NEXT: shlq $32, %rax 610; AVX2-NEXT: orq %rcx, %rax 611; AVX2-NEXT: vzeroupper 612; AVX2-NEXT: retq 613; 614; AVX512F-LABEL: v64i8: 615; AVX512F: # %bb.0: 616; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 617; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 618; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 619; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 620; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 621; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 622; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 623; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 624; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 625; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 626; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 627; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 628; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0 629; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 630; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 631; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 632; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 633; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 634; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 635; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 636; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4} 637; AVX512F-NEXT: kmovw %k0, %eax 638; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3} 639; AVX512F-NEXT: kmovw %k0, %ecx 640; AVX512F-NEXT: shll $16, %ecx 641; AVX512F-NEXT: orl %eax, %ecx 642; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} 643; AVX512F-NEXT: kmovw %k0, %edx 644; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} 645; AVX512F-NEXT: kmovw %k0, %eax 646; AVX512F-NEXT: shll $16, %eax 647; AVX512F-NEXT: orl %edx, %eax 648; AVX512F-NEXT: shlq $32, %rax 649; AVX512F-NEXT: orq %rcx, %rax 650; AVX512F-NEXT: vzeroupper 651; AVX512F-NEXT: retq 652; 653; AVX512BW-LABEL: v64i8: 654; AVX512BW: # %bb.0: 655; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 656; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1} 657; AVX512BW-NEXT: kmovq %k0, %rax 658; AVX512BW-NEXT: vzeroupper 659; AVX512BW-NEXT: retq 660 %x0 = icmp sgt <64 x i8> %a, %b 661 %x1 = icmp sgt <64 x i8> %c, %d 662 %y = and <64 x i1> %x0, %x1 663 %res = bitcast <64 x i1> %y to i64 664 ret i64 %res 665} 666