1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW 7 8define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { 9; SSE-LABEL: v8i64: 10; SSE: # %bb.0: 11; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 12; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 13; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 14; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 15; SSE-NEXT: pcmpgtq %xmm7, %xmm3 16; SSE-NEXT: pcmpgtq %xmm6, %xmm2 17; SSE-NEXT: packssdw %xmm3, %xmm2 18; SSE-NEXT: pcmpgtq %xmm5, %xmm1 19; SSE-NEXT: pcmpgtq %xmm4, %xmm0 20; SSE-NEXT: packssdw %xmm1, %xmm0 21; SSE-NEXT: packssdw %xmm2, %xmm0 22; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm11 23; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm10 24; SSE-NEXT: packssdw %xmm11, %xmm10 25; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm9 26; SSE-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm8 27; SSE-NEXT: packssdw %xmm9, %xmm8 28; SSE-NEXT: packssdw %xmm10, %xmm8 29; SSE-NEXT: pand %xmm0, %xmm8 30; SSE-NEXT: packsswb %xmm8, %xmm8 31; SSE-NEXT: pmovmskb %xmm8, %eax 32; SSE-NEXT: # kill: def $al killed $al killed $eax 33; SSE-NEXT: retq 34; 35; AVX1-LABEL: v8i64: 36; AVX1: # %bb.0: 37; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 38; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9 39; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 40; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 41; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm8 42; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 43; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 44; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 45; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm4 46; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4 47; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm4, %ymm4 48; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 49; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 50; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 51; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 52; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1 53; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 54; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 55; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3 56; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 57; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 58; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 59; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 60; AVX1-NEXT: vmovmskps %ymm0, %eax 61; AVX1-NEXT: # kill: def $al killed $al killed $eax 62; AVX1-NEXT: vzeroupper 63; AVX1-NEXT: retq 64; 65; AVX2-LABEL: v8i64: 66; AVX2: # %bb.0: 67; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm5 68; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4 69; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 70; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 71; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 72; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 73; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 74; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 75; AVX2-NEXT: vmovmskps %ymm0, %eax 76; AVX2-NEXT: # kill: def $al killed $al killed $eax 77; AVX2-NEXT: vzeroupper 78; AVX2-NEXT: retq 79; 80; AVX512F-LABEL: v8i64: 81; AVX512F: # %bb.0: 82; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 83; AVX512F-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 84; AVX512F-NEXT: kmovw %k0, %eax 85; AVX512F-NEXT: # kill: def $al killed $al killed $eax 86; AVX512F-NEXT: vzeroupper 87; AVX512F-NEXT: retq 88; 89; AVX512BW-LABEL: v8i64: 90; AVX512BW: # %bb.0: 91; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 92; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k0 {%k1} 93; AVX512BW-NEXT: kmovd %k0, %eax 94; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 95; AVX512BW-NEXT: vzeroupper 96; AVX512BW-NEXT: retq 97 %x0 = icmp sgt <8 x i64> %a, %b 98 %x1 = icmp sgt <8 x i64> %c, %d 99 %y = and <8 x i1> %x0, %x1 100 %res = bitcast <8 x i1> %y to i8 101 ret i8 %res 102} 103 104define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) { 105; SSE-LABEL: v8f64: 106; SSE: # %bb.0: 107; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8 108; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9 109; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10 110; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11 111; SSE-NEXT: cmpltpd %xmm3, %xmm7 112; SSE-NEXT: cmpltpd %xmm2, %xmm6 113; SSE-NEXT: packssdw %xmm7, %xmm6 114; SSE-NEXT: cmpltpd %xmm1, %xmm5 115; SSE-NEXT: cmpltpd %xmm0, %xmm4 116; SSE-NEXT: packssdw %xmm5, %xmm4 117; SSE-NEXT: packssdw %xmm6, %xmm4 118; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11 119; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10 120; SSE-NEXT: packssdw %xmm11, %xmm10 121; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9 122; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8 123; SSE-NEXT: packssdw %xmm9, %xmm8 124; SSE-NEXT: packssdw %xmm10, %xmm8 125; SSE-NEXT: pand %xmm4, %xmm8 126; SSE-NEXT: packsswb %xmm8, %xmm8 127; SSE-NEXT: pmovmskb %xmm8, %eax 128; SSE-NEXT: # kill: def $al killed $al killed $eax 129; SSE-NEXT: retq 130; 131; AVX1-LABEL: v8f64: 132; AVX1: # %bb.0: 133; AVX1-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 134; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 135; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5 136; AVX1-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 137; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 138; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4 139; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 140; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 141; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 142; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 143; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 145; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 146; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 147; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 148; AVX1-NEXT: vmovmskps %ymm0, %eax 149; AVX1-NEXT: # kill: def $al killed $al killed $eax 150; AVX1-NEXT: vzeroupper 151; AVX1-NEXT: retq 152; 153; AVX2-LABEL: v8f64: 154; AVX2: # %bb.0: 155; AVX2-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 156; AVX2-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 157; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 158; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 159; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 160; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 161; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 162; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 163; AVX2-NEXT: vmovmskps %ymm0, %eax 164; AVX2-NEXT: # kill: def $al killed $al killed $eax 165; AVX2-NEXT: vzeroupper 166; AVX2-NEXT: retq 167; 168; AVX512F-LABEL: v8f64: 169; AVX512F: # %bb.0: 170; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 171; AVX512F-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 172; AVX512F-NEXT: kmovw %k0, %eax 173; AVX512F-NEXT: # kill: def $al killed $al killed $eax 174; AVX512F-NEXT: vzeroupper 175; AVX512F-NEXT: retq 176; 177; AVX512BW-LABEL: v8f64: 178; AVX512BW: # %bb.0: 179; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 180; AVX512BW-NEXT: vcmpltpd %zmm2, %zmm3, %k0 {%k1} 181; AVX512BW-NEXT: kmovd %k0, %eax 182; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 183; AVX512BW-NEXT: vzeroupper 184; AVX512BW-NEXT: retq 185 %x0 = fcmp ogt <8 x double> %a, %b 186 %x1 = fcmp ogt <8 x double> %c, %d 187 %y = and <8 x i1> %x0, %x1 188 %res = bitcast <8 x i1> %y to i8 189 ret i8 %res 190} 191 192define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { 193; SSE-LABEL: v32i16: 194; SSE: # %bb.0: 195; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 196; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 197; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 198; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 199; SSE-NEXT: pcmpgtw %xmm5, %xmm1 200; SSE-NEXT: pcmpgtw %xmm4, %xmm0 201; SSE-NEXT: packsswb %xmm1, %xmm0 202; SSE-NEXT: pcmpgtw %xmm7, %xmm3 203; SSE-NEXT: pcmpgtw %xmm6, %xmm2 204; SSE-NEXT: packsswb %xmm3, %xmm2 205; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11 206; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10 207; SSE-NEXT: packsswb %xmm11, %xmm10 208; SSE-NEXT: pand %xmm0, %xmm10 209; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9 210; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8 211; SSE-NEXT: packsswb %xmm9, %xmm8 212; SSE-NEXT: pand %xmm2, %xmm8 213; SSE-NEXT: pmovmskb %xmm10, %ecx 214; SSE-NEXT: pmovmskb %xmm8, %eax 215; SSE-NEXT: shll $16, %eax 216; SSE-NEXT: orl %ecx, %eax 217; SSE-NEXT: retq 218; 219; AVX1-LABEL: v32i16: 220; AVX1: # %bb.0: 221; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 222; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 223; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 224; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 225; AVX1-NEXT: vpacksswb %xmm8, %xmm1, %xmm8 226; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 227; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 228; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 229; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 230; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 231; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 232; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 233; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 234; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2 235; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 236; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1 237; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 238; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 239; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 240; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3 241; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2 242; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 243; AVX1-NEXT: vpmovmskb %xmm0, %ecx 244; AVX1-NEXT: vpmovmskb %xmm1, %eax 245; AVX1-NEXT: shll $16, %eax 246; AVX1-NEXT: orl %ecx, %eax 247; AVX1-NEXT: vzeroupper 248; AVX1-NEXT: retq 249; 250; AVX2-LABEL: v32i16: 251; AVX2: # %bb.0: 252; AVX2-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 253; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 254; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 255; AVX2-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1 256; AVX2-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 257; AVX2-NEXT: vpacksswb %ymm1, %ymm2, %ymm1 258; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 259; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 260; AVX2-NEXT: vpmovmskb %ymm0, %eax 261; AVX2-NEXT: vzeroupper 262; AVX2-NEXT: retq 263; 264; AVX512F-LABEL: v32i16: 265; AVX512F: # %bb.0: 266; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 267; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5 268; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4 269; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 270; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1 271; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5 272; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm5, %ymm1 273; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1 274; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 275; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 276; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 277; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 278; AVX512F-NEXT: kmovw %k0, %ecx 279; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm0 280; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 281; AVX512F-NEXT: kmovw %k0, %eax 282; AVX512F-NEXT: shll $16, %eax 283; AVX512F-NEXT: orl %ecx, %eax 284; AVX512F-NEXT: vzeroupper 285; AVX512F-NEXT: retq 286; 287; AVX512BW-LABEL: v32i16: 288; AVX512BW: # %bb.0: 289; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k1 290; AVX512BW-NEXT: vpcmpgtw %zmm3, %zmm2, %k0 {%k1} 291; AVX512BW-NEXT: kmovd %k0, %eax 292; AVX512BW-NEXT: vzeroupper 293; AVX512BW-NEXT: retq 294 %x0 = icmp sgt <32 x i16> %a, %b 295 %x1 = icmp sgt <32 x i16> %c, %d 296 %y = and <32 x i1> %x0, %x1 297 %res = bitcast <32 x i1> %y to i32 298 ret i32 %res 299} 300 301define i16 @v16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) { 302; SSE-LABEL: v16i32: 303; SSE: # %bb.0: 304; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 305; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 306; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 307; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 308; SSE-NEXT: pcmpgtd %xmm7, %xmm3 309; SSE-NEXT: pcmpgtd %xmm6, %xmm2 310; SSE-NEXT: packssdw %xmm3, %xmm2 311; SSE-NEXT: pcmpgtd %xmm5, %xmm1 312; SSE-NEXT: pcmpgtd %xmm4, %xmm0 313; SSE-NEXT: packssdw %xmm1, %xmm0 314; SSE-NEXT: packsswb %xmm2, %xmm0 315; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm11 316; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm10 317; SSE-NEXT: packssdw %xmm11, %xmm10 318; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm9 319; SSE-NEXT: pcmpgtd {{[0-9]+}}(%rsp), %xmm8 320; SSE-NEXT: packssdw %xmm9, %xmm8 321; SSE-NEXT: packsswb %xmm10, %xmm8 322; SSE-NEXT: pand %xmm0, %xmm8 323; SSE-NEXT: pmovmskb %xmm8, %eax 324; SSE-NEXT: # kill: def $ax killed $ax killed $eax 325; SSE-NEXT: retq 326; 327; AVX1-LABEL: v16i32: 328; AVX1: # %bb.0: 329; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 330; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 331; AVX1-NEXT: vpcmpgtd %xmm8, %xmm9, %xmm8 332; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 333; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 334; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 335; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 336; AVX1-NEXT: vpcmpgtd %xmm3, %xmm1, %xmm1 337; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 338; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 339; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm0 340; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 341; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 342; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 343; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm2 344; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 345; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 346; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 347; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 348; AVX1-NEXT: vpcmpgtd %xmm6, %xmm4, %xmm3 349; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 350; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 351; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 352; AVX1-NEXT: vpmovmskb %xmm0, %eax 353; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 354; AVX1-NEXT: vzeroupper 355; AVX1-NEXT: retq 356; 357; AVX2-LABEL: v16i32: 358; AVX2: # %bb.0: 359; AVX2-NEXT: vpcmpgtd %ymm3, %ymm1, %ymm1 360; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 361; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 362; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 363; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 364; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 365; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 366; AVX2-NEXT: vpcmpgtd %ymm7, %ymm5, %ymm1 367; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 368; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 369; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm2 370; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 371; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 372; AVX2-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 373; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 374; AVX2-NEXT: vpmovmskb %xmm0, %eax 375; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 376; AVX2-NEXT: vzeroupper 377; AVX2-NEXT: retq 378; 379; AVX512F-LABEL: v16i32: 380; AVX512F: # %bb.0: 381; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 382; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 383; AVX512F-NEXT: kmovw %k0, %eax 384; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 385; AVX512F-NEXT: vzeroupper 386; AVX512F-NEXT: retq 387; 388; AVX512BW-LABEL: v16i32: 389; AVX512BW: # %bb.0: 390; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 391; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k0 {%k1} 392; AVX512BW-NEXT: kmovd %k0, %eax 393; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 394; AVX512BW-NEXT: vzeroupper 395; AVX512BW-NEXT: retq 396 %x0 = icmp sgt <16 x i32> %a, %b 397 %x1 = icmp sgt <16 x i32> %c, %d 398 %y = and <16 x i1> %x0, %x1 399 %res = bitcast <16 x i1> %y to i16 400 ret i16 %res 401} 402 403define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) { 404; SSE-LABEL: v16f32: 405; SSE: # %bb.0: 406; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8 407; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9 408; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10 409; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11 410; SSE-NEXT: cmpltps %xmm3, %xmm7 411; SSE-NEXT: cmpltps %xmm2, %xmm6 412; SSE-NEXT: packssdw %xmm7, %xmm6 413; SSE-NEXT: cmpltps %xmm1, %xmm5 414; SSE-NEXT: cmpltps %xmm0, %xmm4 415; SSE-NEXT: packssdw %xmm5, %xmm4 416; SSE-NEXT: packsswb %xmm6, %xmm4 417; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11 418; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10 419; SSE-NEXT: packssdw %xmm11, %xmm10 420; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9 421; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8 422; SSE-NEXT: packssdw %xmm9, %xmm8 423; SSE-NEXT: packsswb %xmm10, %xmm8 424; SSE-NEXT: pand %xmm4, %xmm8 425; SSE-NEXT: pmovmskb %xmm8, %eax 426; SSE-NEXT: # kill: def $ax killed $ax killed $eax 427; SSE-NEXT: retq 428; 429; AVX12-LABEL: v16f32: 430; AVX12: # %bb.0: 431; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 432; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 433; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 434; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 435; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 436; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 437; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 438; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1 439; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 440; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 441; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2 442; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 443; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 444; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 445; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 446; AVX12-NEXT: vpmovmskb %xmm0, %eax 447; AVX12-NEXT: # kill: def $ax killed $ax killed $eax 448; AVX12-NEXT: vzeroupper 449; AVX12-NEXT: retq 450; 451; AVX512F-LABEL: v16f32: 452; AVX512F: # %bb.0: 453; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 454; AVX512F-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 455; AVX512F-NEXT: kmovw %k0, %eax 456; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 457; AVX512F-NEXT: vzeroupper 458; AVX512F-NEXT: retq 459; 460; AVX512BW-LABEL: v16f32: 461; AVX512BW: # %bb.0: 462; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1 463; AVX512BW-NEXT: vcmpltps %zmm2, %zmm3, %k0 {%k1} 464; AVX512BW-NEXT: kmovd %k0, %eax 465; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 466; AVX512BW-NEXT: vzeroupper 467; AVX512BW-NEXT: retq 468 %x0 = fcmp ogt <16 x float> %a, %b 469 %x1 = fcmp ogt <16 x float> %c, %d 470 %y = and <16 x i1> %x0, %x1 471 %res = bitcast <16 x i1> %y to i16 472 ret i16 %res 473} 474 475define i64 @v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { 476; SSE-LABEL: v64i8: 477; SSE: # %bb.0: 478; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 479; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 480; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 481; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 482; SSE-NEXT: pcmpgtb %xmm4, %xmm0 483; SSE-NEXT: pcmpgtb %xmm5, %xmm1 484; SSE-NEXT: pcmpgtb %xmm6, %xmm2 485; SSE-NEXT: pcmpgtb %xmm7, %xmm3 486; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm11 487; SSE-NEXT: pand %xmm0, %xmm11 488; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm10 489; SSE-NEXT: pand %xmm1, %xmm10 490; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm9 491; SSE-NEXT: pand %xmm2, %xmm9 492; SSE-NEXT: pcmpgtb {{[0-9]+}}(%rsp), %xmm8 493; SSE-NEXT: pand %xmm3, %xmm8 494; SSE-NEXT: pmovmskb %xmm11, %eax 495; SSE-NEXT: pmovmskb %xmm10, %ecx 496; SSE-NEXT: shll $16, %ecx 497; SSE-NEXT: orl %eax, %ecx 498; SSE-NEXT: pmovmskb %xmm9, %edx 499; SSE-NEXT: pmovmskb %xmm8, %eax 500; SSE-NEXT: shll $16, %eax 501; SSE-NEXT: orl %edx, %eax 502; SSE-NEXT: shlq $32, %rax 503; SSE-NEXT: orq %rcx, %rax 504; SSE-NEXT: retq 505; 506; AVX1-LABEL: v64i8: 507; AVX1: # %bb.0: 508; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 509; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 510; AVX1-NEXT: vpcmpgtb %xmm8, %xmm9, %xmm8 511; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm9 512; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 513; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 514; AVX1-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm1 515; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 516; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm2 517; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3 518; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 519; AVX1-NEXT: vpand %xmm2, %xmm8, %xmm2 520; AVX1-NEXT: vpcmpgtb %xmm7, %xmm5, %xmm3 521; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3 522; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5 523; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm7 524; AVX1-NEXT: vpcmpgtb %xmm5, %xmm7, %xmm5 525; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 526; AVX1-NEXT: vpcmpgtb %xmm6, %xmm4, %xmm4 527; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 528; AVX1-NEXT: vpmovmskb %xmm0, %eax 529; AVX1-NEXT: vpmovmskb %xmm1, %ecx 530; AVX1-NEXT: shll $16, %ecx 531; AVX1-NEXT: orl %eax, %ecx 532; AVX1-NEXT: vpmovmskb %xmm3, %edx 533; AVX1-NEXT: vpmovmskb %xmm2, %eax 534; AVX1-NEXT: shll $16, %eax 535; AVX1-NEXT: orl %edx, %eax 536; AVX1-NEXT: shlq $32, %rax 537; AVX1-NEXT: orq %rcx, %rax 538; AVX1-NEXT: vzeroupper 539; AVX1-NEXT: retq 540; 541; AVX2-LABEL: v64i8: 542; AVX2: # %bb.0: 543; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 544; AVX2-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 545; AVX2-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 546; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 547; AVX2-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2 548; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 549; AVX2-NEXT: vpmovmskb %ymm0, %ecx 550; AVX2-NEXT: vpmovmskb %ymm1, %eax 551; AVX2-NEXT: shlq $32, %rax 552; AVX2-NEXT: orq %rcx, %rax 553; AVX2-NEXT: vzeroupper 554; AVX2-NEXT: retq 555; 556; AVX512F-LABEL: v64i8: 557; AVX512F: # %bb.0: 558; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 559; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5 560; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm5, %ymm4 561; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 562; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm1 563; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm5 564; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm5, %ymm1 565; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm1 566; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2 567; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 568; AVX512F-NEXT: vpmovmskb %ymm0, %ecx 569; AVX512F-NEXT: vpmovmskb %ymm1, %eax 570; AVX512F-NEXT: shlq $32, %rax 571; AVX512F-NEXT: orq %rcx, %rax 572; AVX512F-NEXT: vzeroupper 573; AVX512F-NEXT: retq 574; 575; AVX512BW-LABEL: v64i8: 576; AVX512BW: # %bb.0: 577; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 578; AVX512BW-NEXT: vpcmpgtb %zmm3, %zmm2, %k0 {%k1} 579; AVX512BW-NEXT: kmovq %k0, %rax 580; AVX512BW-NEXT: vzeroupper 581; AVX512BW-NEXT: retq 582 %x0 = icmp sgt <64 x i8> %a, %b 583 %x1 = icmp sgt <64 x i8> %c, %d 584 %y = and <64 x i1> %x0, %x1 585 %res = bitcast <64 x i1> %y to i64 586 ret i64 %res 587} 588